wukong 3.0.0.pre3 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -1,119 +0,0 @@
1
- module Wukong
2
- module EventMachineServer
3
- include DriverMethods
4
-
5
- def self.included klass
6
- klass.class_eval do
7
- attr_accessor :dataflow, :settings
8
-
9
- def self.add_signal_traps
10
- Signal.trap('INT') { log.info 'Received SIGINT. Stopping.' ; EM.stop }
11
- Signal.trap('TERM') { log.info 'Received SIGTERM. Stopping.' ; EM.stop }
12
- end
13
- end
14
- end
15
-
16
- def initialize(label, settings)
17
- super
18
- @settings = settings
19
- @dataflow = construct_dataflow(label, settings)
20
- end
21
-
22
- end
23
-
24
- class StdioServer < EM::P::LineAndTextProtocol
25
- include EventMachineServer
26
- include Processor::StdoutProcessor
27
- include Logging
28
-
29
- def self.start(label, settings = {})
30
- EM.attach($stdin, self, label, settings)
31
- end
32
-
33
- def post_init
34
- self.class.add_signal_traps
35
- setup_dataflow
36
- end
37
-
38
- def receive_line line
39
- driver.send_through_dataflow(line)
40
- end
41
-
42
- def unbind
43
- finalize_and_stop_dataflow
44
- EM.stop
45
- end
46
-
47
- end
48
-
49
- class TCPServer < EM::P::LineAndTextProtocol
50
- include EventMachineServer
51
- include Processor::BufferedProcessor
52
- include Logging
53
-
54
- def self.start(label, settings = {})
55
- host = settings[:host] || Socket.gethostname
56
- port = settings[:port] || 9000
57
- EM.start_server(host, port, self, label, settings)
58
- log.info "Server started on #{host} on port #{port}"
59
- add_signal_traps
60
- end
61
-
62
- def post_init
63
- port, ip = Socket.unpack_sockaddr_in(get_peername)
64
- log.info "Connected to #{ip} on #{port}"
65
- setup_dataflow
66
- end
67
-
68
- def receive_line line
69
- @buffer = []
70
- operation = proc { driver.send_through_dataflow(line) }
71
- callback = proc { flush_buffer @buffer }
72
- EM.defer(operation, callback)
73
- end
74
-
75
- def flush_buffer records
76
- send_data(records.join("\n") + "\n")
77
- records.clear
78
- end
79
-
80
- def unbind
81
- EM.stop
82
- end
83
-
84
- end
85
- end
86
-
87
- class StupidServer
88
-
89
- attr_accessor :dataflow, :settings
90
-
91
- def initialize(label, settings)
92
- @settings = settings
93
- builder = (Wukong.registry.retrieve(label.to_sym) or raise Wukong::Error.new("No such processor or dataflow: #{label}"))
94
- dataflow = builder.build(settings)
95
- @dataflow = dataflow.respond_to?(:stages) ? dataflow.directed_sort.map{ |name| dataflow.stages[name] } : [ dataflow ]
96
- @dataflow << self
97
- end
98
-
99
- def dumb_driver
100
- @dumb_driver ||= Wukong::Driver.new(dataflow)
101
- end
102
-
103
- def stop() ; end
104
- def setup() ; end
105
- def process(record) $stdout.puts record ; end
106
-
107
- def run!
108
- dataflow.each(&:setup)
109
-
110
- while line = $stdin.readline.chomp rescue nil do
111
- dumb_driver.send_through_dataflow(line)
112
- end
113
- dataflow.each do |stage|
114
- stage.finalize(&dumb_driver.advance(stage)) if stage.respond_to?(:finalize)
115
- stage.stop
116
- end
117
-
118
- end
119
- end
@@ -1,157 +0,0 @@
1
- require 'open3'
2
-
3
- module Wukong
4
- module SpecHelpers
5
-
6
- # Provides a `command` method for writing integration tests for
7
- # commands.
8
- module IntegrationRunner
9
-
10
- # Spawn a command and capture its STDOUT, STDERR, and exit code.
11
- #
12
- # The `args` will be joined together into a command line.
13
- #
14
- # It is expected that you will use the matchers defined in
15
- # IntegrationMatchers in your integration tests:
16
- #
17
- # @example Check output of 'ls' includes a string 'foo.txt'
18
- # it "lists files" do
19
- # command('ls').should have_output('foo.txt')
20
- # end
21
- #
22
- # @example More complicated
23
- # context "long format" do
24
- # it "lists files with timestamps" do
25
- # command('ls', '-l').should have_output('foo.txt', /\w+ \d+ \d+:\d+/)
26
- # end
27
- # end
28
- #
29
- # @param [Array<String>] args
30
- #
31
- # @overload command(*args, options={})
32
- # If the last element of `args` is a Hash it will be used for
33
- # options.
34
- #
35
- # The :env option specifies the command line environment to
36
- # use for the command. By default this will be the value of
37
- # the Ruby process's own `ENV` variable. If running in a
38
- # context in which the `integration_env` method is defined,
39
- # its return value will be merged on top of `ENV`. An
40
- # explicitly provided :env option will again be merged on top.
41
- #
42
- # The :cwd option specifies the working directory to start in.
43
- # It defaults to the value of <tt>Dir.pwd</tt>
44
- #
45
- # @param [Array<String>] args
46
- # @param [Hash] options
47
- # @option options [Hash] env the shell environment to spawn the command with
48
- # @option options [Hash] cwd the directory to execute the command in
49
- def command *args
50
- a = args.flatten.compact
51
- options = (a.last.is_a?(Hash) ? a.pop : {})
52
-
53
- env = ENV.to_hash.dup
54
- env.merge!(integration_env) if respond_to?(:integration_env)
55
- env.merge!(options[:env] || {})
56
-
57
- cwd = options[:cwd]
58
- cwd ||= (respond_to?(:integration_cwd) ? integration_cwd : Dir.pwd)
59
-
60
- IntegrationDriver.new(a, cwd: cwd, env: env)
61
- end
62
- end
63
-
64
- # A driver for running commands in a subprocess.
65
- class IntegrationDriver
66
-
67
- # The command to execute
68
- attr_accessor :cmd
69
-
70
- # The directory in which to execute the command.
71
- attr_accessor :cwd
72
-
73
- # The ID of the spawned subprocess (while it was running).
74
- attr_accessor :pid
75
-
76
- # The STDOUT of the spawned process.
77
- attr_accessor :stdout
78
-
79
- # The STDERR of the spawned process.
80
- attr_accessor :stderr
81
-
82
- # The exit code of the spawned process.
83
- attr_accessor :exit_code
84
-
85
- # Run the command and capture its outputs and exit code.
86
- #
87
- # @return [true, false]
88
- def run!
89
- return false if ran?
90
- FileUtils.cd(cwd) do
91
- Open3.popen3(env, cmd) do |i, o, e, wait_thr|
92
- self.pid = wait_thr.pid
93
-
94
- @inputs.each { |input| i.puts(input) }
95
- i.close
96
-
97
- self.stdout = o.read
98
- self.stderr = e.read
99
- self.exit_code = wait_thr.value.to_i
100
- end
101
- end
102
- @ran = true
103
- end
104
-
105
- # Initialize a new IntegrationDriver to run a given command.
106
- def initialize args, options
107
- @args = args
108
- @env = options[:env]
109
- @cwd = options[:cwd]
110
- @inputs = []
111
- end
112
-
113
- def cmd
114
- @args.compact.map(&:to_s).join(' ')
115
- end
116
-
117
- def on *events
118
- @inputs.concat(events)
119
- self
120
- end
121
- alias_method :<, :on
122
-
123
- def in dir
124
- @cwd = dir
125
- self
126
- end
127
-
128
- def using env
129
- @env = env
130
- self
131
- end
132
-
133
- def env
134
- ENV.to_hash.merge(@env || {})
135
- end
136
-
137
- def ran?
138
- @ran
139
- end
140
-
141
- def cmd_summary
142
- [
143
- cmd,
144
- "with env #{env_summary}",
145
- "in dir #{cwd}"
146
- ].join("\n")
147
- end
148
-
149
- def env_summary
150
- { "PATH" => env["PATH"], "RUBYLIB" => env["RUBYLIB"] }.inspect
151
- end
152
-
153
- end
154
- end
155
- end
156
-
157
-
@@ -1,89 +0,0 @@
1
- module Wukong
2
- module SpecHelpers
3
- module ProcessorHelpers
4
-
5
- # Creates a new processor in a variety of convenient ways.
6
- #
7
- # Most simply, called without args, will return a new instance of
8
- # a the klass named in the containing `describe` or `context`:
9
- #
10
- # context MyApp::Tokenizer do
11
- # it "uses whitespace as the default separator between tokens" do
12
- # processor.separator.should == /\s+/
13
- # end
14
- # end
15
- #
16
- # if your processor has been registered (you created it with the
17
- # <tt>Wukong.processor</tt> helper method or otherwise
18
- # registered it yourself) then you can use its name:
19
- #
20
- # context :tokenizer do
21
- # it "uses whitespace as the default separator between tokens" do
22
- # processor.separator.should == /\s+/
23
- # end
24
- # end
25
- #
26
- # The `processor` method can also be used inside RSpec's
27
- # `subject` and `let` methods:
28
- #
29
- # context "with no arguments" do
30
- # subject { processor }
31
- # it "uses whitespace as the default separator between tokens" do
32
- # separator.should == /\s+/
33
- # end
34
- # end
35
- # end
36
- #
37
- # and you can easily pass arguments, just like you would on the
38
- # command line or in a dataflow definition:
39
- #
40
- # context "with arguments" do
41
- # subject { processor(separator: ' ') }
42
- # it "uses whitespace as the default separator between tokens" do
43
- # separator.should == ' '
44
- # end
45
- # end
46
- # end
47
- #
48
- # You can even name the processor directly if you want to:
49
- #
50
- # context "tokenizers" do
51
- # let(:default_tokenizer) { processor(:tokenizer) }
52
- # let(:complex_tokenizer) { processor(:complex_tokenizer, stemming: true) }
53
- # let(:french_tokenizer) { processor(:complex_tokenizer, stemming: true) }
54
- # ...
55
- # end
56
- def processor *args, &block
57
- options = args.extract_options!
58
- name = args.first || self.class.description
59
- create_processor(name, options, &block)
60
- end
61
- alias_method :flow, :processor
62
-
63
- # Is the given +klass+ a Wukong::Processor?
64
- #
65
- # @param [Class] klass
66
- # @return [true, false]
67
- def processor? klass
68
- klass.build.is_a?(Processor)
69
- end
70
-
71
- # :nodoc:
72
- def create_processor name_or_klass, options={}, &block
73
- if name_or_klass.is_a?(Class)
74
- klass = name_or_klass
75
- else
76
- klass = Wukong.registry.retrieve(name_or_klass.to_s.to_sym)
77
- raise Error.new("Could not find a Wukong::Processor class named '#{name_or_klass}'") if klass.nil?
78
- end
79
- raise Error.new("#{klass} is not a subclass of Wukong::Processor") unless processor?(klass)
80
- settings = Configliere::Param.new
81
- Wukong.boot!(settings)
82
- proc = klass.build(settings.merge(options))
83
- proc.setup
84
- proc.instance_eval(&block) if block_given?
85
- proc
86
- end
87
- end
88
- end
89
- end
@@ -1,28 +0,0 @@
1
- module Wukong
2
- module SpecHelpers
3
- class SpecDriver < Array
4
-
5
- attr_reader :processor
6
-
7
- def initialize processor
8
- super()
9
- @processor = processor
10
- end
11
-
12
- def run
13
- return false unless processor
14
- processor.given_records.each do |input|
15
- processor.process(input) do |output|
16
- self << output
17
- end
18
- end
19
- processor.finalize do |output|
20
- self << output
21
- end
22
- processor.stop
23
- self
24
- end
25
-
26
- end
27
- end
28
- end
@@ -1,31 +0,0 @@
1
- require 'spec_helper'
2
- # require 'wukong'
3
- # require 'wukong/local_runner'
4
-
5
- # describe Wukong::LocalRunner, :examples_spec => true, :helpers => true do
6
-
7
- # context 'examples' do
8
-
9
- # subject{
10
- # test_sink = test_sink()
11
- # Wukong.dataflow(:integers) do
12
- # input :default, Wukong::Source::Integers.new(:size => 100)
13
- # output :default, test_sink
14
-
15
- # input(:default) >
16
- # map(&:to_s) >
17
- # re(/..+/) >
18
- # map(&:reverse) >
19
- # limit(20) >
20
- # output(:default)
21
- # end
22
- # Wukong::LocalRunner.receive(:flow => Wukong.dataflow(:integers))
23
- # }
24
-
25
- # it 'runs' do
26
- # subject.run(:default)
27
- # subject.flow.output(:default).records.should == %w[01 11 21 31 41 51 61 71 81 91 02 12 22 32 42 52 62 72 82 92]
28
- # end
29
-
30
- # end
31
- # end
@@ -1,125 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe 'wu-local' do
4
-
5
- let(:input) { %w[1 2 3] }
6
-
7
- context "without any arguments" do
8
- subject { command('wu-local') }
9
- it {should exit_with(:non_zero) }
10
- it "displays help on STDERR" do
11
- should have_stderr("usage: wu-local")
12
- end
13
- end
14
-
15
- context "running outside any Ruby project" do
16
- subject { command('wu-local count').in(examples_dir('empty')) < input }
17
- it { should exit_with(0) }
18
- it "runs the processor" do
19
- should have_stdout("3")
20
- end
21
- context "when passed a BUNDLE_GEMFILE" do
22
- context "that doesn't belong to a deploy pack" do
23
- subject { command('wu-local count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('ruby_project', 'Gemfile').to_s)) < input }
24
- it { should exit_with(0) }
25
- it "runs the processor" do
26
- should have_stdout("3")
27
- end
28
- end
29
- context "that belongs to a deploy pack" do
30
- subject { command('wu-local count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < input }
31
- it { should exit_with(0) }
32
- it "runs the processor" do
33
- should have_stdout("3")
34
- end
35
- context "loading the deploy pack" do
36
- subject { command('wu-local string_reverser').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < 'hi' }
37
- it { should exit_with(0) }
38
- it "runs the processor" do
39
- should have_stdout("ih")
40
- end
41
- end
42
- end
43
- end
44
- end
45
-
46
- context "running within a Ruby project" do
47
- context "at its root" do
48
- subject { command('wu-local count').in(examples_dir('ruby_project')) < input }
49
- it { should exit_with(0) }
50
- it "runs the processor" do
51
- should have_stdout("3")
52
- end
53
- end
54
- context "deep within it" do
55
- subject { command('wu-local count').in(examples_dir('ruby_project')) < input }
56
- it { should exit_with(0) }
57
- it "runs the processor" do
58
- should have_stdout("3")
59
- end
60
- end
61
- end
62
-
63
- context "running within a deploy pack" do
64
- context "at its root" do
65
- subject { command('wu-local count').in(examples_dir('deploy_pack')) < input }
66
- it { should exit_with(0) }
67
- it "runs the processor" do
68
- should have_stdout("3")
69
- end
70
- context "loading the deploy pack" do
71
- subject { command('wu-local string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
72
- it { should exit_with(0) }
73
- it "runs the processor" do
74
- should have_stdout("ih")
75
- end
76
- end
77
- end
78
- context "deep within it" do
79
- subject { command('wu-local count').in(examples_dir('deploy_pack')) < input }
80
- it { should exit_with(0) }
81
- it "runs the processor" do
82
- should have_stdout("3")
83
- end
84
- context "loading the deploy pack" do
85
- subject { command('wu-local string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
86
- it { should exit_with(0) }
87
- it "runs the processor" do
88
- should have_stdout("ih")
89
- end
90
- end
91
- end
92
- end
93
-
94
- # context "running within a deploy pack" do
95
- # context "at its root" do
96
- # let(:subject) { command('wu-local', :cwd => examples_dir('deploy_pack')) }
97
- # end
98
- # context "deep within it" do
99
- # let(:subject) { command('wu-local', :cwd => examples_dir('deploy_pack', 'a','b','c')) }
100
- # end
101
- # end
102
-
103
- # context "in local mode" do
104
- # context "on a map-only job" do
105
- # let(:subject) { command('wu-hadoop', example_script('tokenizer.rb'), "--mode=local", "--input=#{example_script('sonnet_18.txt')}") }
106
- # it { should exit_with(0) }
107
- # it { should have_stdout('Shall', 'I', 'compare', 'thee', 'to', 'a', "summer's", 'day') }
108
- # end
109
-
110
- # context "on a map-reduce job" do
111
- # let(:subject) { command('wu-hadoop', example_script('word_count.rb'), "--mode=local", "--input=#{example_script('sonnet_18.txt')}") }
112
- # it { should exit_with(0) }
113
- # it { should have_stdout(/complexion\s+1/, /Death\s+1/, /temperate\s+1/) }
114
- # end
115
- # end
116
-
117
- # context "in Hadoop mode" do
118
- # context "on a map-only job" do
119
- # let(:subject) { command('wu-hadoop', example_script('tokenizer.rb'), "--mode=hadoop", "--input=/data/in", "--output=/data/out", "--dry_run") }
120
- # it { should exit_with(0) }
121
- # it { should have_stdout(%r{jar.*hadoop.*streaming.*\.jar}, %r{-mapper.+tokenizer\.rb}, %r{-input.*/data/in}, %r{-output.*/data/out}) }
122
- # end
123
- # end
124
-
125
- end