wukong 3.0.0.pre3 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -1,77 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'wukong'
4
-
5
- settings = Wukong::Local::Configuration
6
- settings.use(:commandline)
7
-
8
- def settings.usage
9
- "usage: #{File.basename($0)} PROCESSOR|FLOW [ --param=value | -p value | --param | -p]"
10
- end
11
-
12
- settings.description = <<-EOF
13
- wu-local is a tool for running Wukong processors and flows locally on
14
- the command-line. Use wu-local by passing it a processor and feeding
15
- in some data:
16
-
17
- $ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
18
- UNIX
19
- is
20
- Clever
21
- and
22
- Fun
23
-
24
- If your processors have named fields you can pass them in as
25
- arguments:
26
-
27
- $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
28
- UNIX
29
- Clever
30
-
31
- You can chain processors and calls to wu-local together:
32
-
33
- $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
34
- unix
35
- clever
36
-
37
- Which is a good way to develop a combined data flow which you can
38
- again test locally:
39
-
40
- $ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
41
- unix
42
- clever
43
- EOF
44
-
45
- settings.define :run, description: "Name of the processor or dataflow to use. Defaults to basename of the given path.", flag: 'r'
46
- # settings.define :tcp_server, description: "Run locally as a tcp server on a specified port", default: false, flag: 't'
47
- require 'wukong/boot' ; Wukong.boot!(settings)
48
-
49
- thing = settings.rest.first
50
- case
51
- when thing.nil?
52
- settings.dump_help
53
- exit(1)
54
- when Wukong.registry.registered?(thing.to_sym)
55
- processor = thing.to_sym
56
- when File.exist?(thing)
57
- load thing
58
- processor = settings.run || File.basename(thing, '.rb')
59
- else
60
- settings.dump_help
61
- exit(2)
62
- end
63
-
64
-
65
-
66
- begin
67
- # EM.run do
68
- # settings.tcp_server ? Wu::TCPServer.start(processor.to_sym, settings) : Wu::StdioServer.start(processor.to_sym, settings)
69
- # end
70
- StupidServer.new(processor.to_sym, settings).run!
71
- rescue Wu::Error => e
72
- $stderr.puts e.message
73
- exit(3)
74
- end
75
-
76
- # One day, it will be this easy...
77
- # Wukong::LocalRunner.run!
4
+ Wukong::Local::LocalRunner.run
@@ -0,0 +1,39 @@
1
+ digraph WuLocalControlFlow {
2
+ size ="100,100";
3
+ stdin [label=<
4
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
5
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">STDIN</FONT></TD></TR>
6
+ <TR><TD><FONT POINT-SIZE="10">The line of input text</FONT></TD></TR>
7
+ <TR><TD><FONT FACE="MONOSPACE">Shall I compare thee to a summers day?<BR/>Thou art more lovely and more temperate<BR/>...</FONT></TD></TR>
8
+
9
+ </TABLE>>];
10
+ command [shape=diamond,label=<
11
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
12
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">Command</FONT></TD></TR>
13
+ <TR><TD><FONT POINT-SIZE="10">A UNIX process launched on the command line</FONT></TD></TR>
14
+ <TR><TD><FONT FACE="MONOSPACE">wu-local word_counter</FONT></TD></TR>
15
+ </TABLE>>];
16
+ runner [shape=box,label=<
17
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
18
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">Runner</FONT></TD></TR>
19
+ <TR><TD><FONT POINT-SIZE="10">Loads plugins and code, configures and resolves settings, boots plugins, validates command line, then runs.</FONT></TD></TR>
20
+ <TR><TD><FONT FACE="MONOSPACE">Wukong::Local::LocalRunner</FONT></TD></TR>
21
+ </TABLE>>];
22
+ driver [shape=box,label=<
23
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
24
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">Driver</FONT></TD></TR>
25
+ <TR><TD><FONT POINT-SIZE="10">Passes input to processor, handles output</FONT></TD></TR>
26
+ <TR><TD><FONT FACE="MONOSPACE">Wukong::Local::StdioDriver</FONT></TD></TR>
27
+ </TABLE>>];
28
+ stdout [label=<
29
+ <TABLE BORDER="0" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
30
+ <TR><TD><FONT FACE="BOLD" POINT-SIZE="20">STDOUT</FONT></TD></TR>
31
+ <TR><TD><FONT POINT-SIZE="10">The resulting output lines</FONT></TD></TR>
32
+ <TR><TD><FONT FACE="MONOSPACE">8<BR/>7<BR/>...</FONT></TD></TR>
33
+ </TABLE>>];
34
+
35
+ command -> runner [label="Implemented By"];
36
+ runner -> driver [label="Instantiates"];
37
+ stdin -> driver [label="Reads 1 Line"];
38
+ driver -> stdout [label="Writes N Lines"];
39
+ }
Binary file
@@ -0,0 +1,2 @@
1
+ # Just a file to test that we can correctly load Ruby files from the
2
+ # command-line.
@@ -0,0 +1,7 @@
1
+ # A simple processor in its own little file.
2
+ class StringReverser < Wukong::Processor
3
+ def process line
4
+ yield line.reverse
5
+ end
6
+ register(:string_reverser)
7
+ end
@@ -25,7 +25,7 @@ module Hanuman
25
25
  include Gorillib::Model
26
26
  extend StageClassMethods
27
27
 
28
- field :label, Symbol
28
+ field :label, Symbol, :doc => false
29
29
  end
30
30
 
31
31
  class StageBuilder
@@ -43,7 +43,7 @@ module Hanuman
43
43
  end
44
44
 
45
45
  def build(options = {})
46
- for_class.receive self.serialize.merge(options)
46
+ for_class.receive self.serialize.merge(options).merge(options[label] || {})
47
47
  end
48
48
 
49
49
  def handle_extra_attributes(attrs)
@@ -4,16 +4,14 @@ require 'multi_json'
4
4
  require 'eventmachine'
5
5
  require 'log4r'
6
6
 
7
- require 'hanuman'
7
+ require_relative 'hanuman'
8
+ require_relative 'wukong/logger'
9
+ require_relative 'wukong/processor'
10
+ require_relative 'wukong/dataflow'
11
+ require_relative 'wukong/plugin'
12
+ require_relative 'wukong/driver'
13
+ require_relative 'wukong/runner'
8
14
 
9
- require 'wukong/logger'
10
- require 'wukong/processor'
11
- require 'wukong/dataflow'
12
- require 'wukong/configuration'
13
- require 'wukong/widgets'
14
- require 'wukong/driver'
15
- require 'wukong/server'
16
- # require 'wukong/runner'
17
15
 
18
16
  # The Wukong module will contain all code for Wukong's core (like
19
17
  # Processors and Dataflows) as well as all plugins.
@@ -25,7 +23,15 @@ module Wukong
25
23
 
26
24
  # A common error class intended to be raised by code within Wukong
27
25
  # or its plugins.
28
- Error = Class.new(StandardError)
26
+ class Error < StandardError
27
+ def initialize msg_or_error
28
+ if msg_or_error.respond_to?(:message) && msg_or_error.respond_to?(:backtrace)
29
+ super([msg_or_error.message, msg_or_error.backtrace].compact.join("\n"))
30
+ else
31
+ super(msg_or_error)
32
+ end
33
+ end
34
+ end
29
35
 
30
36
  add_shortcut_method_for(:processor, ProcessorBuilder)
31
37
  add_shortcut_method_for(:dataflow, DataflowBuilder)
@@ -34,3 +40,8 @@ end
34
40
 
35
41
  # Alias module name for shorter namespaces
36
42
  Wu = Wukong
43
+
44
+ require_relative 'wukong/widgets'
45
+ require_relative 'wukong/local'
46
+
47
+
@@ -1,11 +1,8 @@
1
1
  module Wukong
2
2
  class DataflowBuilder < Hanuman::GraphBuilder
3
3
 
4
- def describe desc
5
- @description = desc
6
- end
7
-
8
- def description
4
+ def description desc=nil
5
+ @description = desc if desc
9
6
  @description
10
7
  end
11
8
 
@@ -0,0 +1,14 @@
1
+ require 'yard'
2
+ require 'wukong'
3
+ require_relative('doc_helpers/processor_handler')
4
+ require_relative('doc_helpers/dataflow_handler')
5
+ require_relative('doc_helpers/field_handler')
6
+
7
+ module Wukong
8
+
9
+ # This module defines several YARD handlers for processing inline
10
+ # documentation written around Wukong processors and dataflows and
11
+ # Gorillib fields when using the DSL.
12
+ module DocHelpers
13
+ end
14
+ end
@@ -0,0 +1,29 @@
1
+ module Wukong
2
+ module DocHelpers
3
+
4
+ # Handles the Wukong.dataflow syntax.
5
+ class DataflowHandler < YARD::Handlers::Ruby::ClassHandler
6
+
7
+ handles method_call(:dataflow)
8
+
9
+ # :nodoc:
10
+ def base_dataflow_class
11
+ @base_dataflow_class ||= YARD::CodeObjects::ClassObject.new(namespace, "Wukong::Dataflow")
12
+ end
13
+
14
+ # :nodoc:
15
+ def process
16
+ dataflow_name = statement.parameters.first.jump(:tstring_content, :ident).source
17
+ class_name = Gorillib::Inflector.camelize(dataflow_name)
18
+ dataflow_class = create_class(class_name, base_dataflow_class)
19
+ dataflow_body = statement.last.last
20
+
21
+ push_state(:owner => dataflow_class, :scope => :class, :namespace => dataflow_class) do
22
+ parse_block(dataflow_body)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,91 @@
1
+ module Wukong
2
+ module DocHelpers
3
+
4
+ # Handles the syntax
5
+ #
6
+ # class Foo
7
+ # include Gorillib::Model
8
+ # field :bar, Integer, :default => 3
9
+ # end
10
+ class FieldHandler < YARD::Handlers::Ruby::ClassHandler
11
+
12
+ handles method_call(:field)
13
+ namespace_only
14
+
15
+ def process
16
+ register(getter)
17
+ register(setter)
18
+ namespace.attributes[:instance][field_name] = { :read => getter, :write => setter }
19
+ end
20
+
21
+ def getter
22
+ @getter ||= YARD::CodeObjects::MethodObject.new(namespace, field_name, :instance).tap do |method|
23
+ method.docstring = getter_docstring
24
+ end
25
+ end
26
+
27
+ def setter
28
+ @setter ||= YARD::CodeObjects::MethodObject.new(namespace, field_name + '=', :instance).tap do |method|
29
+ method.docstring = setter_docstring
30
+ end
31
+ end
32
+
33
+ def getter_docstring
34
+ doc = "@return [#{field_type}]"
35
+ doc += " #{field_doc}" if field_doc
36
+ doc += " [Default: #{field_default}]" if field_default
37
+ doc
38
+ end
39
+
40
+ def setter_docstring
41
+ doc = "@return [#{field_type}]"
42
+ doc += " #{field_doc}" if field_doc
43
+ doc += " [Default: #{field_default}]" if field_default
44
+ doc
45
+ end
46
+
47
+ def field_name
48
+ statement.parameters.first.jump(:tstring_content, :ident).source
49
+ end
50
+
51
+ def field_type
52
+ statement.parameters[1].jump(:string_content, :ident).source
53
+ end
54
+
55
+ def field_options
56
+ return @field_options if @field_options
57
+ @field_options = {}
58
+ field_options_obj = statement.parameters[2]
59
+ if field_options_obj
60
+ keys_and_values = field_options_obj.jump(:assoc)
61
+ until keys_and_values.empty?
62
+ obj = keys_and_values.shift
63
+ if obj.type == :symbol_literal
64
+ key = obj.source.to_s.gsub(/^:/,'').to_sym
65
+ value_obj = keys_and_values.shift
66
+ if value_obj
67
+ value = case key
68
+ when :doc then value_obj.source.to_s.gsub(/^"/,'').gsub(/"$/,'')
69
+ else
70
+ value_obj.source
71
+ end
72
+ @field_options[key] = value
73
+ end
74
+ end
75
+ end
76
+ end
77
+ @field_options
78
+ end
79
+
80
+ def field_doc
81
+ field_options[:doc]
82
+ end
83
+
84
+ def field_default
85
+ field_options[:default]
86
+ end
87
+
88
+ end
89
+ end
90
+ end
91
+
@@ -0,0 +1,29 @@
1
+ module Wukong
2
+ module DocHelpers
3
+
4
+ # Handles the Wukong.processor syntax.
5
+ class ProcessorHandler < YARD::Handlers::Ruby::ClassHandler
6
+
7
+ handles method_call(:processor)
8
+
9
+ # :nodoc:
10
+ def base_processor_class
11
+ @base_processor_class ||= YARD::CodeObjects::ClassObject.new(namespace, "Wukong::Processor")
12
+ end
13
+
14
+ # :nodoc:
15
+ def process
16
+ processor_name = statement.parameters.first.jump(:tstring_content, :ident).source
17
+ class_name = Gorillib::Inflector.camelize(processor_name)
18
+ processor_class = create_class(class_name, base_processor_class)
19
+ processor_body = statement.last.last
20
+
21
+ push_state(:owner => processor_class, :scope => :class, :namespace => processor_class) do
22
+ parse_block(processor_body)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+
@@ -1,12 +1,16 @@
1
1
  module Wukong
2
2
  module DriverMethods
3
3
 
4
+ attr_accessor :dataflow
5
+
6
+ attr_accessor :settings
7
+
4
8
  def driver
5
9
  @driver ||= Driver.new(dataflow)
6
10
  end
7
11
 
8
12
  def lookup(label)
9
- raise Wukong::Error.new("could not find definition for #{label}") unless Wukong.registry.registered?(label.to_sym)
13
+ raise Wukong::Error.new("could not find definition for <#{label}>") unless Wukong.registry.registered?(label.to_sym)
10
14
  Wukong.registry.retrieve(label.to_sym)
11
15
  end
12
16
 
@@ -29,6 +33,12 @@ module Wukong
29
33
  dataflow.each(&:setup)
30
34
  end
31
35
 
36
+ def finalize_dataflow
37
+ dataflow.each do |stage|
38
+ stage.finalize(&driver.advance(stage)) if stage.respond_to?(:finalize)
39
+ end
40
+ end
41
+
32
42
  def finalize_and_stop_dataflow
33
43
  dataflow.each do |stage|
34
44
  stage.finalize(&driver.advance(stage)) if stage.respond_to?(:finalize)
@@ -0,0 +1,40 @@
1
+ module Wukong
2
+
3
+ # Provides methods for supporting the running of Wukong processors
4
+ # and dataflows entirely locally, without any frameworks like Hadoop
5
+ # or Storm.
6
+ #
7
+ # This module is actually a plugin for Wukong.
8
+ module Local
9
+ include Plugin
10
+
11
+ # Configures the given +settings+ object with all settings
12
+ # specific to Wukong::Local for the given program +name+.
13
+ #
14
+ # @param [Configliere::Param] settings the settings to configure
15
+ # @param [String] program the name of the currently executing program
16
+ def self.configure settings, program
17
+ case program
18
+ when 'wu-local'
19
+ settings.define :run, description: "Name of the processor or dataflow to use. Defaults to basename of the given path.", flag: 'r'
20
+ settings.define :tcp_port, description: "Consume TCP requests on the given port instead of lines over STDIN", type: Integer, flag: 't'
21
+
22
+ settings.define :from, description: "Parse input from given data format (json, tsv, &c.) before processing"
23
+ settings.define :to, description: "Convert input to given data format (json, tsv, &c.) before emitting"
24
+
25
+ settings.define :consumes, description: "Parse input as instances of given model class before processing", type: Class
26
+ end
27
+ end
28
+
29
+ # Boots Wukong::Local using the given +settings+ at the given
30
+ # +root.
31
+ #
32
+ # @param [Configliere::Param] settings the settings to use to boot
33
+ # @param [String] root the root directory to boot in
34
+ def self.boot(settings, root)
35
+ end
36
+
37
+ end
38
+ end
39
+
40
+ require_relative('local/runner')