wukong 3.0.0.pre3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -1,144 +1,167 @@
1
+ require_relative("runner/code_loader")
2
+ require_relative("runner/deploy_pack_loader")
3
+ require_relative("runner/boot_sequence")
4
+
1
5
  module Wukong
2
- module CommandlineRunner
3
-
4
- def exit_with_status(status, options = {})
5
- warn options[:msg] if options[:msg]
6
- @env.dump_help if options[:show_help]
7
- exit(status)
8
- end
9
-
10
- def env= settings
11
- @env = settings
12
- end
13
6
 
14
- def self.included(base)
15
- base.extend(ClassMethods)
7
+ # A base class which handles
8
+ #
9
+ # * requiring any necessary code like deploy packs or code from command-line arguments
10
+ # * having all plugins configure settings as necessary
11
+ # * resolving settings
12
+ # * having all plugins boot from now resolved settings
13
+ # * parsing command-line arguments
14
+ # * instantiating and handing over control to a driver which runs the actual code
15
+ class Runner
16
+
17
+ include Logging
18
+ include CodeLoader
19
+ include DeployPackLoader
20
+ include BootSequence
21
+
22
+ # The settings object that will be configured and booted from.
23
+ # All plugins will configure this object.
24
+ attr_accessor :settings
25
+
26
+ # Create a new Runner with the given +settings+.
27
+ #
28
+ # Uses an empty Configliere::Param object if no +settings+ are
29
+ # given.
30
+ #
31
+ # @param [Configliere::Param] settings
32
+ def initialize settings=Configliere::Param.new
33
+ self.settings = settings
16
34
  end
17
35
 
18
- module ClassMethods
19
-
20
- def usage(usg = nil)
21
- return @usage if usg.nil?
22
- @usage = usg
36
+ # Instantiates a new Runner and boot it up.
37
+ #
38
+ # Will rescue any Wukong::Error with a logged error message and
39
+ # exit.
40
+ def self.run(settings=Configliere::Param.new)
41
+ begin
42
+ new(settings).boot!
43
+ rescue Wukong::Error => e
44
+ die(e.message, 127)
23
45
  end
46
+ end
24
47
 
25
- def desc(dsc = nil)
26
- return @description if dsc.nil?
27
- @decription = desc
28
- end
48
+ # The parsed command-line arguments.
49
+ #
50
+ # Will raise an error if +boot+ hasn't been called yet.
51
+ #
52
+ # @return [Array<String>]
53
+ def args
54
+ settings.rest
55
+ end
29
56
 
30
- def add_param(*args)
31
- defined_params << args
32
- end
33
-
34
- def defined_params
35
- @defined_params ||= []
36
- end
57
+ # The root directory we should consider ourselves to be running
58
+ # in.
59
+ #
60
+ # Defaults to the root directory of a deploy pack if we're running
61
+ # inside one, else just returns `Dir.pwd`.
62
+ #
63
+ # @return [String]
64
+ def root
65
+ in_deploy_pack? ? deploy_pack_dir : Dir.pwd
66
+ end
37
67
 
38
- def base_config(conf = nil)
39
- return @base_configuration if conf.nil?
40
- @base_configuration = conf
41
- end
68
+ # Convenience method for setting the usage message of a Runner.
69
+ #
70
+ # @param [String, nil] msg set the usage message
71
+ # @return [String] the usage message
72
+ def self.usage msg=nil
73
+ return @usage unless msg
74
+ @usage = msg
75
+ end
76
+
77
+ # Convenience method for setting the description message of a Runner.
78
+ #
79
+ # @param [String, nil] msg set the description message
80
+ # @return [String] the description message
81
+ def self.description msg=nil
82
+ return @description unless msg
83
+ @description = msg
84
+ end
42
85
 
43
- def decorate_environment! env
44
- usg = self.usage
45
- env.define_singleton_method(:usage){ usg }
46
- env.description = self.desc
47
- defined_params.each{ |params| env.send(:define, *params) }
48
- end
86
+ # Kill this process with the given error `message` and exit
87
+ # `code`.
88
+ #
89
+ # @param [String] message
90
+ # @param [Integer] code.
91
+ def self.die(message=nil, code=127)
92
+ log.error(message) if message
93
+ exit(code)
94
+ end
49
95
 
50
- def in_deploy_pack?
51
- return @in_deploy_pack unless @in_deploy_pack.nil?
52
- @in_deploy_pack = (find_deploy_pack_dir != '/')
53
- end
96
+ # Return the name of the program this Runner is running.
97
+ #
98
+ # This is passed to plugins which can configure settings
99
+ # appropriately. Defaults to the name of the currently running
100
+ # process.
101
+ #
102
+ # @return [String]
103
+ def program_name
104
+ @program_name || File.basename($0)
105
+ end
54
106
 
55
- def find_deploy_pack_dir
56
- return @deploy_pack_dir if @deploy_pack_dir
57
- wd = Dir.pwd
58
- parent = File.dirname(wd)
59
- until wd == parent
60
- return wd if File.exist?(File.join(wd, 'Gemfile')) && File.exist?(File.join(wd, 'config', 'environment.rb'))
61
- wd = parent
62
- parent = File.dirname(wd)
63
- end
64
- @deploy_pack_dir = wd
65
- end
107
+ # Explicitly set the name of the program this Runner is running.
108
+ #
109
+ # This is useful for unit tests in which the name of the currently
110
+ # running process may be different from the runner command being
111
+ # tested (`rspec` vs. `wu-local`).
112
+ #
113
+ # @param [String] name
114
+ def program_name= name
115
+ @program_name = name
116
+ end
66
117
 
67
- def run!(*run_params)
68
- settings = base_configuration || Configliere::Param.use(:commandline)
69
- boot_environment(settings) if in_deploy_pack?
70
- runner = new(*run_params)
71
- runner.env = settings.resolve!
72
- runner.run(*settings.rest)
73
- end
74
-
75
- end
76
- end
77
-
78
- class LocalRunner
79
- include CommandlineRunner
80
- base_configuration
81
-
82
- usage 'usage: wu-local PROCESSOR|FLOW [ --param=value | -p value | --param | -p]'
83
- desc <<EOF
84
- wu-local is a tool for running Wukong processors and flows locally on
85
- the command-line. Use wu-local by passing it a processor and feeding
86
- in some data:
87
-
88
- $ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
89
- UNIX
90
- is
91
- Clever
92
- and
93
- Fun
94
-
95
- If your processors have named fields you can pass them in as
96
- arguments:
97
-
98
- $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
99
- UNIX
100
- Clever
101
-
102
- You can chain processors and calls to wu-local together:
103
-
104
- $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
105
- unix
106
- clever
107
-
108
- Which is a good way to develop a combined data flow which you can
109
- again test locally:
110
-
111
- $ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
112
- unix
113
- clever
114
- EOF
115
-
116
- add_param :run, description: "Name of the processor or dataflow to use. Defaults to basename of the given path.", flag: 'r'
117
- add_param :tcp_server, description: "Run locally as a server using provided TCP port", default: false, flag: 't'
118
-
119
- def run *args
120
- arg = args.first
121
- case
122
- when arg.nil?
123
- exit_with_status(1, show_help: true, msg: "Must pass a processor name or path to a processor file. Got <#{arg}>")
124
- when Wukong.registry.registered?(arg.to_sym)
125
- processor = arg.to_sym
126
- when File.exist?(arg)
127
- load arg
128
- processor = @env.run || File.basename(arg, '.rb')
129
- else
130
- exit_with_status(2, show_help: true, msg: "Must pass a processor name or path to a processor file. Got <#{arg}>")
131
- end
132
- run_em_server(processor, @env)
118
+ # Return the usage message for this runner.
119
+ #
120
+ # @return [String] the usage message
121
+ def usage
122
+ ["usage: #{program_name} [ --param=val | --param | -p val | -p ]", self.class.usage].compact.join(' ')
133
123
  end
134
-
135
- def run_em_server(processor, env)
136
- EM.run do
137
- env.tcp_server ? Wu::TCPServer.start(processor, env) : Wu::StdioServer.start(processor, env)
138
- end
139
- rescue Wu::Error => e
140
- exit_with_status(3, msg: e.backtrace.join("\n"))
124
+
125
+ # Return the description text for this runner.
126
+ #
127
+ # @return [String] the description text
128
+ def description
129
+ self.class.description
130
+ end
131
+
132
+ # Is there a processor or dataflow registered with the given
133
+ # `name`?
134
+ #
135
+ # @param [String] name
136
+ # @return [true, false]
137
+ def registered? name
138
+ name && Wukong.registry.registered?(name.to_sym)
139
+ end
140
+
141
+ # Retrieve the dataflow registered under a given `name`.
142
+ #
143
+ # @param [String,Symbol] name
144
+ # @return [Wukong::Processor, Wukong::Dataflow, nil]
145
+ def dataflow_class_for(name)
146
+ builder = (Wukong.registry.retrieve(name.to_sym) or return)
147
+ builder.for_class
148
+ end
149
+
150
+ # Is the given `name` a registered as a processor?
151
+ #
152
+ # @param [String,Symbol] name
153
+ # @return [true, false]
154
+ def processor?(name)
155
+ registered?(name) && dataflow_class_for(name).ancestors.include?(Wukong::Processor)
156
+ end
157
+
158
+ # Is the given `name` a registered as a dataflow?
159
+ #
160
+ # @param [String,Symbol] name
161
+ # @return [true, false]
162
+ def dataflow?(name)
163
+ registered?(name) && dataflow_class_for(name).ancestors.include?(Wukong::Dataflow)
141
164
  end
142
165
 
143
- end
166
+ end
144
167
  end
@@ -0,0 +1,123 @@
1
+ require_relative('help_message')
2
+ module Wukong
3
+ class Runner
4
+
5
+ # The boot sequence of a runner consists of the following phases,
6
+ # each corresponding to a method provided by this module.
7
+ #
8
+ # * #load -- loads all application code
9
+ # * #configure -- configures settings from core Wukong, any loaded plugins, and any application code
10
+ # * #resolve -- resolves settings
11
+ # * #setup -- boots core Wukong and all loaded plugins
12
+ # * #validate -- validates command-line args
13
+ # * #run -- starts the runner running
14
+ #
15
+ # Each method can be separately overriden, allowing for a lot of
16
+ # customizability for different kinds of runners.
17
+ module BootSequence
18
+
19
+ include HelpMessage
20
+
21
+ # Boot this Runner, calling in order:
22
+ #
23
+ # * #load
24
+ # * #configure
25
+ # * #resolve
26
+ # * #setup
27
+ # * #validate
28
+ # * #run or #die
29
+ #
30
+ # If `override_settings` is passed then merge it over the
31
+ # Runner's usual settings (this is useful for unit tests where
32
+ # settings are injected in ways different from the usual
33
+ # workflow).
34
+ #
35
+ # @param [Configliere::Param] override_settings
36
+ def boot!(override_settings=nil)
37
+ load
38
+ configure
39
+ resolve
40
+ setup
41
+ settings.merge!(override_settings) if override_settings
42
+
43
+ case
44
+ when help_given? then dump_help_and_exit!
45
+ when validate then run
46
+ else
47
+ die("Invalid arguments")
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ # Loads all code necessary for this Runner to perform, including:
54
+ #
55
+ # * any code associated with being inside of a deploy pack
56
+ # * any code passed in as (unknown rest) arguments on the command-line
57
+ def load
58
+ load_deploy_pack
59
+ load_args
60
+ end
61
+
62
+ # Endows the settings with everything it needs, including usage,
63
+ # description, and any define's provided by this Runner class or
64
+ # any plugins.
65
+ def configure
66
+ settings.use(:commandline)
67
+ settings.description = description if description
68
+ u = usage
69
+ settings.define_singleton_method(:usage){ u } if u
70
+ Wukong.configure_plugins(settings, program_name)
71
+ end
72
+
73
+ # Resolves the settings.
74
+ #
75
+ # Rescues some of the annoying RuntimeErrors thrown by
76
+ # Configliere...
77
+ def resolve
78
+ begin
79
+ strip_help_param!
80
+ settings.resolve!
81
+ true
82
+ rescue RuntimeError, SystemExit => e
83
+ raise Error.new(e)
84
+ end
85
+ end
86
+
87
+ # Performs any setup code necessary before run.
88
+ #
89
+ # Boots all plugins by default. If you override this code, make
90
+ # sure to either call `super` or boot plugins yourself.
91
+ def setup
92
+ Wukong.boot_plugins(settings, root)
93
+ end
94
+
95
+ # Validates the command-line args. Raise a Wukong::Error in
96
+ # this method to terminate execution with a specific or custom
97
+ # error.
98
+ #
99
+ # Return false-like to terminate with a generic argument error.
100
+ #
101
+ # @return [true, false]
102
+ def validate
103
+ true
104
+ end
105
+
106
+ # Run this runner.
107
+ #
108
+ # You'll want to override this method in your own Runner class.
109
+ def run
110
+ end
111
+
112
+ # Kill this runner with the given error `message` and exit
113
+ # `code`.
114
+ #
115
+ # @param [String] message
116
+ # @param [Integer] code
117
+ def die message=nil, code=126
118
+ self.class.die(message, code)
119
+ end
120
+
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,52 @@
1
+ module Wukong
2
+ class Runner
3
+
4
+ # Defines methods to help a Runner class load code passed in
5
+ # dynamically on the command-line.
6
+ #
7
+ # The default behavior of code in this module is to load any Ruby
8
+ # files (ending with `.rb`) passed in the command-line.
9
+ module CodeLoader
10
+
11
+ # Loads all code, whether from a deploy pack or additionally
12
+ # passed on the command line.
13
+ def load_args
14
+ (args_to_load || []).each do |path|
15
+ load_ruby_file(path)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ # Load any additional code that we found out about on the
22
+ # command-line.
23
+ #
24
+ # @return [Array<String>] paths to load culled from the ARGV.
25
+ def args_to_load
26
+ ruby_file_args || []
27
+ end
28
+
29
+ # Returns all pre-resolved arguments which are Ruby files.
30
+ #
31
+ # @return [Array<String>]
32
+ def ruby_file_args
33
+ ARGV.find_all { |arg| arg.to_s =~ /\.rb$/ && arg.to_s !~ /^--/ }
34
+ end
35
+
36
+ # Loads a single Ruby file, capturing LoadError and SyntaxError
37
+ # and raising Wukong::Error instead (so it can be easily captured
38
+ # by the Runner).
39
+ #
40
+ # @param [String] path
41
+ # @raise [Wukong::Error] if there is an error
42
+ def load_ruby_file path
43
+ return unless path
44
+ begin
45
+ Kernel.load path
46
+ rescue LoadError, SyntaxError => e
47
+ raise Error.new(e)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end