wukong 3.0.0.pre3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -1,77 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'wukong'
4
-
5
- settings = Wukong::Local::Configuration
6
- settings.use(:commandline)
7
-
8
- def settings.usage
9
- "usage: #{File.basename($0)} PROCESSOR|FLOW [ --param=value | -p value | --param | -p]"
10
- end
11
-
12
- settings.description = <<-EOF
13
- wu-local is a tool for running Wukong processors and flows locally on
14
- the command-line. Use wu-local by passing it a processor and feeding
15
- in some data:
16
-
17
- $ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
18
- UNIX
19
- is
20
- Clever
21
- and
22
- Fun
23
-
24
- If your processors have named fields you can pass them in as
25
- arguments:
26
-
27
- $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
28
- UNIX
29
- Clever
30
-
31
- You can chain processors and calls to wu-local together:
32
-
33
- $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
34
- unix
35
- clever
36
-
37
- Which is a good way to develop a combined data flow which you can
38
- again test locally:
39
-
40
- $ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
41
- unix
42
- clever
43
- EOF
44
-
45
- settings.define :run, description: "Name of the processor or dataflow to use. Defaults to basename of the given path.", flag: 'r'
46
- # settings.define :tcp_server, description: "Run locally as a tcp server on a specified port", default: false, flag: 't'
47
- require 'wukong/boot' ; Wukong.boot!(settings)
48
-
49
- thing = settings.rest.first
50
- case
51
- when thing.nil?
52
- settings.dump_help
53
- exit(1)
54
- when Wukong.registry.registered?(thing.to_sym)
55
- processor = thing.to_sym
56
- when File.exist?(thing)
57
- load thing
58
- processor = settings.run || File.basename(thing, '.rb')
59
- else
60
- settings.dump_help
61
- exit(2)
62
- end
63
-
64
-
65
-
66
- begin
67
- # EM.run do
68
- # settings.tcp_server ? Wu::TCPServer.start(processor.to_sym, settings) : Wu::StdioServer.start(processor.to_sym, settings)
69
- # end
70
- StupidServer.new(processor.to_sym, settings).run!
71
- rescue Wu::Error => e
72
- $stderr.puts e.message
73
- exit(3)
74
- end
75
-
76
- # One day, it will be this easy...
77
- # Wukong::LocalRunner.run!
4
+ Wukong::Local::LocalRunner.run
@@ -0,0 +1,39 @@
1
+ digraph WuLocalControlFlow {
2
+ size ="100,100";
3
+ stdin [label=<
4
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
5
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">STDIN</FONT></TD></TR>
6
+ <TR><TD><FONT POINT-SIZE="10">The line of input text</FONT></TD></TR>
7
+ <TR><TD><FONT FACE="MONOSPACE">Shall I compare thee to a summers day?<BR/>Thou art more lovely and more temperate<BR/>...</FONT></TD></TR>
8
+
9
+ </TABLE>>];
10
+ command [shape=diamond,label=<
11
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
12
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">Command</FONT></TD></TR>
13
+ <TR><TD><FONT POINT-SIZE="10">A UNIX process launched on the command line</FONT></TD></TR>
14
+ <TR><TD><FONT FACE="MONOSPACE">wu-local word_counter</FONT></TD></TR>
15
+ </TABLE>>];
16
+ runner [shape=box,label=<
17
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
18
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">Runner</FONT></TD></TR>
19
+ <TR><TD><FONT POINT-SIZE="10">Loads plugins and code, configures and resolves settings, boots plugins, validates command line, then runs.</FONT></TD></TR>
20
+ <TR><TD><FONT FACE="MONOSPACE">Wukong::Local::LocalRunner</FONT></TD></TR>
21
+ </TABLE>>];
22
+ driver [shape=box,label=<
23
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
24
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">Driver</FONT></TD></TR>
25
+ <TR><TD><FONT POINT-SIZE="10">Passes input to processor, handles output</FONT></TD></TR>
26
+ <TR><TD><FONT FACE="MONOSPACE">Wukong::Local::StdioDriver</FONT></TD></TR>
27
+ </TABLE>>];
28
+ stdout [label=<
29
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
30
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">STDOUT</FONT></TD></TR>
31
+ <TR><TD><FONT POINT-SIZE="10">The resulting output lines</FONT></TD></TR>
32
+ <TR><TD><FONT FACE="MONOSPACE">8<BR/>7<BR/>...</FONT></TD></TR>
33
+ </TABLE>>];
34
+
35
+ command -> runner [label="Implemented By"];
36
+ runner -> driver [label="Instantiates"];
37
+ stdin -> driver [label="Reads 1 Line"];
38
+ driver -> stdout [label="Writes N Lines"];
39
+ }
Binary file
@@ -0,0 +1,2 @@
1
+ # Just a file to test that we can correctly load Ruby files from the
2
+ # command-line.
@@ -0,0 +1,7 @@
1
+ # A simple processor in its own little file.
2
+ class StringReverser < Wukong::Processor
3
+ def process line
4
+ yield line.reverse
5
+ end
6
+ register(:string_reverser)
7
+ end
@@ -25,7 +25,7 @@ module Hanuman
25
25
  include Gorillib::Model
26
26
  extend StageClassMethods
27
27
 
28
- field :label, Symbol
28
+ field :label, Symbol, :doc => false
29
29
  end
30
30
 
31
31
  class StageBuilder
@@ -43,7 +43,7 @@ module Hanuman
43
43
  end
44
44
 
45
45
  def build(options = {})
46
- for_class.receive self.serialize.merge(options)
46
+ for_class.receive self.serialize.merge(options).merge(options[label] || {})
47
47
  end
48
48
 
49
49
  def handle_extra_attributes(attrs)
@@ -4,16 +4,14 @@ require 'multi_json'
4
4
  require 'eventmachine'
5
5
  require 'log4r'
6
6
 
7
- require 'hanuman'
7
+ require_relative 'hanuman'
8
+ require_relative 'wukong/logger'
9
+ require_relative 'wukong/processor'
10
+ require_relative 'wukong/dataflow'
11
+ require_relative 'wukong/plugin'
12
+ require_relative 'wukong/driver'
13
+ require_relative 'wukong/runner'
8
14
 
9
- require 'wukong/logger'
10
- require 'wukong/processor'
11
- require 'wukong/dataflow'
12
- require 'wukong/configuration'
13
- require 'wukong/widgets'
14
- require 'wukong/driver'
15
- require 'wukong/server'
16
- # require 'wukong/runner'
17
15
 
18
16
  # The Wukong module will contain all code for Wukong's core (like
19
17
  # Processors and Dataflows) as well as all plugins.
@@ -25,7 +23,15 @@ module Wukong
25
23
 
26
24
  # A common error class intended to be raised by code within Wukong
27
25
  # or its plugins.
28
- Error = Class.new(StandardError)
26
+ class Error < StandardError
27
+ def initialize msg_or_error
28
+ if msg_or_error.respond_to?(:message) && msg_or_error.respond_to?(:backtrace)
29
+ super([msg_or_error.message, msg_or_error.backtrace].compact.join("\n"))
30
+ else
31
+ super(msg_or_error)
32
+ end
33
+ end
34
+ end
29
35
 
30
36
  add_shortcut_method_for(:processor, ProcessorBuilder)
31
37
  add_shortcut_method_for(:dataflow, DataflowBuilder)
@@ -34,3 +40,8 @@ end
34
40
 
35
41
  # Alias module name for shorter namespaces
36
42
  Wu = Wukong
43
+
44
+ require_relative 'wukong/widgets'
45
+ require_relative 'wukong/local'
46
+
47
+
@@ -1,11 +1,8 @@
1
1
  module Wukong
2
2
  class DataflowBuilder < Hanuman::GraphBuilder
3
3
 
4
- def describe desc
5
- @description = desc
6
- end
7
-
8
- def description
4
+ def description desc=nil
5
+ @description = desc if desc
9
6
  @description
10
7
  end
11
8
 
@@ -0,0 +1,14 @@
1
+ require 'yard'
2
+ require 'wukong'
3
+ require_relative('doc_helpers/processor_handler')
4
+ require_relative('doc_helpers/dataflow_handler')
5
+ require_relative('doc_helpers/field_handler')
6
+
7
+ module Wukong
8
+
9
+ # This module defines several YARD handlers for processing inline
10
+ # documentation written around Wukong processors and dataflows and
11
+ # Gorillib fields when using the DSL.
12
+ module DocHelpers
13
+ end
14
+ end
@@ -0,0 +1,29 @@
1
+ module Wukong
2
+ module DocHelpers
3
+
4
+ # Handles the Wukong.dataflow syntax.
5
+ class DataflowHandler < YARD::Handlers::Ruby::ClassHandler
6
+
7
+ handles method_call(:dataflow)
8
+
9
+ # :nodoc:
10
+ def base_dataflow_class
11
+ @base_dataflow_class ||= YARD::CodeObjects::ClassObject.new(namespace, "Wukong::Dataflow")
12
+ end
13
+
14
+ # :nodoc:
15
+ def process
16
+ dataflow_name = statement.parameters.first.jump(:tstring_content, :ident).source
17
+ class_name = Gorillib::Inflector.camelize(dataflow_name)
18
+ dataflow_class = create_class(class_name, base_dataflow_class)
19
+ dataflow_body = statement.last.last
20
+
21
+ push_state(:owner => dataflow_class, :scope => :class, :namespace => dataflow_class) do
22
+ parse_block(dataflow_body)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,91 @@
1
+ module Wukong
2
+ module DocHelpers
3
+
4
+ # Handles the syntax
5
+ #
6
+ # class Foo
7
+ # include Gorillib::Model
8
+ # field :bar, Integer, :default => 3
9
+ # end
10
+ class FieldHandler < YARD::Handlers::Ruby::ClassHandler
11
+
12
+ handles method_call(:field)
13
+ namespace_only
14
+
15
+ def process
16
+ register(getter)
17
+ register(setter)
18
+ namespace.attributes[:instance][field_name] = { :read => getter, :write => setter }
19
+ end
20
+
21
+ def getter
22
+ @getter ||= YARD::CodeObjects::MethodObject.new(namespace, field_name, :instance).tap do |method|
23
+ method.docstring = getter_docstring
24
+ end
25
+ end
26
+
27
+ def setter
28
+ @setter ||= YARD::CodeObjects::MethodObject.new(namespace, field_name + '=', :instance).tap do |method|
29
+ method.docstring = setter_docstring
30
+ end
31
+ end
32
+
33
+ def getter_docstring
34
+ doc = "@return [#{field_type}]"
35
+ doc += " #{field_doc}" if field_doc
36
+ doc += " [Default: #{field_default}]" if field_default
37
+ doc
38
+ end
39
+
40
+ def setter_docstring
41
+ doc = "@return [#{field_type}]"
42
+ doc += " #{field_doc}" if field_doc
43
+ doc += " [Default: #{field_default}]" if field_default
44
+ doc
45
+ end
46
+
47
+ def field_name
48
+ statement.parameters.first.jump(:tstring_content, :ident).source
49
+ end
50
+
51
+ def field_type
52
+ statement.parameters[1].jump(:string_content, :ident).source
53
+ end
54
+
55
+ def field_options
56
+ return @field_options if @field_options
57
+ @field_options = {}
58
+ field_options_obj = statement.parameters[2]
59
+ if field_options_obj
60
+ keys_and_values = field_options_obj.jump(:assoc)
61
+ until keys_and_values.empty?
62
+ obj = keys_and_values.shift
63
+ if obj.type == :symbol_literal
64
+ key = obj.source.to_s.gsub(/^:/,'').to_sym
65
+ value_obj = keys_and_values.shift
66
+ if value_obj
67
+ value = case key
68
+ when :doc then value_obj.source.to_s.gsub(/^"/,'').gsub(/"$/,'')
69
+ else
70
+ value_obj.source
71
+ end
72
+ @field_options[key] = value
73
+ end
74
+ end
75
+ end
76
+ end
77
+ @field_options
78
+ end
79
+
80
+ def field_doc
81
+ field_options[:doc]
82
+ end
83
+
84
+ def field_default
85
+ field_options[:default]
86
+ end
87
+
88
+ end
89
+ end
90
+ end
91
+
@@ -0,0 +1,29 @@
1
+ module Wukong
2
+ module DocHelpers
3
+
4
+ # Handles the Wukong.processor syntax.
5
+ class ProcessorHandler < YARD::Handlers::Ruby::ClassHandler
6
+
7
+ handles method_call(:processor)
8
+
9
+ # :nodoc:
10
+ def base_processor_class
11
+ @base_processor_class ||= YARD::CodeObjects::ClassObject.new(namespace, "Wukong::Processor")
12
+ end
13
+
14
+ # :nodoc:
15
+ def process
16
+ processor_name = statement.parameters.first.jump(:tstring_content, :ident).source
17
+ class_name = Gorillib::Inflector.camelize(processor_name)
18
+ processor_class = create_class(class_name, base_processor_class)
19
+ processor_body = statement.last.last
20
+
21
+ push_state(:owner => processor_class, :scope => :class, :namespace => processor_class) do
22
+ parse_block(processor_body)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+
@@ -1,12 +1,16 @@
1
1
  module Wukong
2
2
  module DriverMethods
3
3
 
4
+ attr_accessor :dataflow
5
+
6
+ attr_accessor :settings
7
+
4
8
  def driver
5
9
  @driver ||= Driver.new(dataflow)
6
10
  end
7
11
 
8
12
  def lookup(label)
9
- raise Wukong::Error.new("could not find definition for #{label}") unless Wukong.registry.registered?(label.to_sym)
13
+ raise Wukong::Error.new("could not find definition for <#{label}>") unless Wukong.registry.registered?(label.to_sym)
10
14
  Wukong.registry.retrieve(label.to_sym)
11
15
  end
12
16
 
@@ -29,6 +33,12 @@ module Wukong
29
33
  dataflow.each(&:setup)
30
34
  end
31
35
 
36
+ def finalize_dataflow
37
+ dataflow.each do |stage|
38
+ stage.finalize(&driver.advance(stage)) if stage.respond_to?(:finalize)
39
+ end
40
+ end
41
+
32
42
  def finalize_and_stop_dataflow
33
43
  dataflow.each do |stage|
34
44
  stage.finalize(&driver.advance(stage)) if stage.respond_to?(:finalize)
@@ -0,0 +1,40 @@
1
+ module Wukong
2
+
3
+ # Provides methods for supporting the running of Wukong processors
4
+ # and dataflows entirely locally, without any frameworks like Hadoop
5
+ # or Storm.
6
+ #
7
+ # This module is actually a plugin for Wukong.
8
+ module Local
9
+ include Plugin
10
+
11
+ # Configures the given +settings+ object with all settings
12
+ # specific to Wukong::Local for the given program +name+.
13
+ #
14
+ # @param [Configliere::Param] settings the settings to configure
15
+ # @param [String] program the name of the currently executing program
16
+ def self.configure settings, program
17
+ case program
18
+ when 'wu-local'
19
+ settings.define :run, description: "Name of the processor or dataflow to use. Defaults to basename of the given path.", flag: 'r'
20
+ settings.define :tcp_port, description: "Consume TCP requests on the given port instead of lines over STDIN", type: Integer, flag: 't'
21
+
22
+ settings.define :from, description: "Parse input from given data format (json, tsv, &c.) before processing"
23
+ settings.define :to, description: "Convert input to given data format (json, tsv, &c.) before emitting"
24
+
25
+ settings.define :consumes, description: "Parse input as instances of given model class before processing", type: Class
26
+ end
27
+ end
28
+
29
+ # Boots Wukong::Local using the given +settings+ at the given
30
+ # +root.
31
+ #
32
+ # @param [Configliere::Param] settings the settings to use to boot
33
+ # @param [String] root the root directory to boot in
34
+ def self.boot(settings, root)
35
+ end
36
+
37
+ end
38
+ end
39
+
40
+ require_relative('local/runner')