wukong 3.0.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/README.md +253 -45
- data/bin/wu +34 -0
- data/bin/wu-source +5 -0
- data/examples/Gemfile +0 -1
- data/examples/deploy_pack/Gemfile +0 -1
- data/examples/improver/tweet_summary.rb +73 -0
- data/examples/ruby_project/Gemfile +0 -1
- data/examples/splitter.rb +94 -0
- data/examples/twitter.rb +5 -0
- data/lib/hanuman.rb +1 -1
- data/lib/hanuman/graph.rb +39 -22
- data/lib/hanuman/stage.rb +46 -13
- data/lib/hanuman/tree.rb +67 -0
- data/lib/wukong.rb +6 -1
- data/lib/wukong/dataflow.rb +19 -48
- data/lib/wukong/driver.rb +176 -65
- data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
- data/lib/wukong/driver/wiring.rb +68 -0
- data/lib/wukong/local.rb +6 -4
- data/lib/wukong/local/runner.rb +14 -16
- data/lib/wukong/local/stdio_driver.rb +72 -12
- data/lib/wukong/processor.rb +1 -30
- data/lib/wukong/runner.rb +2 -0
- data/lib/wukong/runner/command_runner.rb +44 -0
- data/lib/wukong/source.rb +33 -0
- data/lib/wukong/source/source_driver.rb +74 -0
- data/lib/wukong/source/source_runner.rb +38 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
- data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
- data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/echo.rb +55 -0
- data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
- data/lib/wukong/widget/filters.rb +15 -0
- data/lib/wukong/widget/logger.rb +56 -0
- data/lib/wukong/widget/operators.rb +82 -0
- data/lib/wukong/widget/reducers.rb +2 -0
- data/lib/wukong/widget/reducers/improver.rb +71 -0
- data/lib/wukong/widget/reducers/join_xml.rb +37 -0
- data/lib/wukong/widget/serializers.rb +21 -6
- data/lib/wukong/widgets.rb +6 -3
- data/spec/hanuman/graph_spec.rb +73 -10
- data/spec/hanuman/stage_spec.rb +15 -0
- data/spec/hanuman/tree_spec.rb +119 -0
- data/spec/spec_helper.rb +13 -1
- data/spec/support/example_test_helpers.rb +0 -1
- data/spec/support/model_test_helpers.rb +1 -1
- data/spec/support/shared_context_for_graphs.rb +57 -0
- data/spec/support/shared_examples_for_builders.rb +8 -15
- data/spec/wukong/driver_spec.rb +152 -0
- data/spec/wukong/local/runner_spec.rb +1 -12
- data/spec/wukong/local/stdio_driver_spec.rb +73 -0
- data/spec/wukong/processor_spec.rb +0 -1
- data/spec/wukong/runner_spec.rb +2 -2
- data/spec/wukong/source_spec.rb +6 -0
- data/spec/wukong/widget/extract_spec.rb +101 -0
- data/spec/wukong/widget/logger_spec.rb +23 -0
- data/spec/wukong/widget/operators_spec.rb +25 -0
- data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
- data/spec/wukong/wu-source_spec.rb +32 -0
- data/spec/wukong/wu_spec.rb +14 -0
- data/wukong.gemspec +1 -2
- metadata +45 -28
- data/lib/wukong/local/tcp_driver.rb +0 -47
- data/spec/wu/geo/geolocated_spec.rb +0 -247
- data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -6,7 +6,6 @@ shared_examples_for 'a processor' do |options = {}|
|
|
6
6
|
it{ processor(options[:named]).processor.should respond_to(:process) }
|
7
7
|
it{ processor(options[:named]).processor.should respond_to(:finalize) }
|
8
8
|
it{ processor(options[:named]).processor.should respond_to(:stop) }
|
9
|
-
it{ processor(options[:named]).processor.should respond_to(:notify) }
|
10
9
|
end
|
11
10
|
|
12
11
|
shared_examples_for 'a plugin' do |options = {}|
|
@@ -99,13 +99,14 @@ module Wukong
|
|
99
99
|
# let(:french_tokenizer) { processor(:complex_tokenizer, stemming: true, language: 'fr') }
|
100
100
|
# ...
|
101
101
|
# end
|
102
|
-
def unit_test_runner *args
|
102
|
+
def unit_test_runner *args, &block
|
103
103
|
settings = args.extract_options!
|
104
104
|
name = (args.first || self.class.description)
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
105
|
+
UnitTestRunner.new(name, settings).tap do |the_runner|
|
106
|
+
the_runner.program_name = 'wu-local'
|
107
|
+
yield the_runner.driver.processor if block_given?
|
108
|
+
the_runner.boot!(settings)
|
109
|
+
end.driver
|
109
110
|
end
|
110
111
|
alias_method :processor, :unit_test_runner
|
111
112
|
|
@@ -4,22 +4,12 @@ module Wukong
|
|
4
4
|
|
5
5
|
include Wukong::DriverMethods
|
6
6
|
|
7
|
-
def initialize label, settings
|
7
|
+
def initialize label, settings={}
|
8
8
|
super()
|
9
|
-
|
10
|
-
@dataflow = construct_dataflow(label, settings)
|
9
|
+
construct_dataflow(label, settings)
|
11
10
|
setup_dataflow
|
12
11
|
end
|
13
12
|
|
14
|
-
def setup
|
15
|
-
end
|
16
|
-
|
17
|
-
def finalize
|
18
|
-
end
|
19
|
-
|
20
|
-
def stop
|
21
|
-
end
|
22
|
-
|
23
13
|
def process output
|
24
14
|
self << output
|
25
15
|
end
|
@@ -27,14 +17,14 @@ module Wukong
|
|
27
17
|
def run
|
28
18
|
return false unless dataflow
|
29
19
|
given_records.each do |input|
|
30
|
-
|
20
|
+
send_through_dataflow(input)
|
31
21
|
end
|
32
22
|
finalize_and_stop_dataflow
|
33
23
|
self
|
34
24
|
end
|
35
25
|
|
36
26
|
def processor
|
37
|
-
dataflow.
|
27
|
+
dataflow.root
|
38
28
|
end
|
39
29
|
|
40
30
|
# An array of accumulated records to process come match-time.
|
@@ -19,29 +19,28 @@ module Wukong
|
|
19
19
|
# of the unit test back into the test suite
|
20
20
|
class UnitTestRunner < Wukong::Local::LocalRunner
|
21
21
|
|
22
|
-
# The processor this runner will create in the same way as
|
23
|
-
# `wu-local`.
|
24
|
-
attr_accessor :processor
|
25
|
-
|
26
22
|
# Initialize a new UnitTestRunner for the processor with the
|
27
23
|
# given `label` and `settings`.
|
28
24
|
#
|
29
25
|
# @param [Symbol] label
|
30
26
|
# @param [Hash] settings
|
31
|
-
def initialize label, settings
|
32
|
-
|
27
|
+
def initialize label, settings={}
|
28
|
+
@dataflow = label
|
33
29
|
params = Configliere::Param.new
|
34
|
-
params.use(:commandline)
|
35
30
|
params.merge!(settings)
|
36
31
|
super(params)
|
37
32
|
end
|
38
33
|
|
34
|
+
def dataflow
|
35
|
+
@dataflow
|
36
|
+
end
|
37
|
+
|
39
38
|
# Override the LocalDriver with the UnitTestDriver so we can
|
40
39
|
# more easily pass in and retrieve processed records.
|
41
40
|
#
|
42
41
|
# @return [UnitTestDriver]
|
43
42
|
def driver
|
44
|
-
@driver ||= UnitTestDriver.new(
|
43
|
+
@driver ||= UnitTestDriver.new(dataflow, settings)
|
45
44
|
end
|
46
45
|
|
47
46
|
# No need to load commandline arguments when we are testing
|
data/lib/wukong/version.rb
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# A widget that yields whatever you instantiate it with.
|
5
|
+
#
|
6
|
+
# This is most useful when you have a small but predictable input
|
7
|
+
# that you don't want or can't pass via usual input channels like
|
8
|
+
# STDIN.
|
9
|
+
#
|
10
|
+
# @example Works just like you think on the command line
|
11
|
+
#
|
12
|
+
# $ echo something else | wu-local echo --input=hello
|
13
|
+
# hello
|
14
|
+
#
|
15
|
+
# @example Pass some fixed input to your downstream code.
|
16
|
+
#
|
17
|
+
# # my_flow.rb
|
18
|
+
# Wukong.dataflow(:my_flow) do
|
19
|
+
# echo(input: {key: 'value'}) | my_proc | ...
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# This differs from from the `:identity` processor because it
|
23
|
+
# doesn't pass on what it receives but what you instantiate it
|
24
|
+
# with.
|
25
|
+
#
|
26
|
+
# @see Identity
|
27
|
+
class Echo < Processor
|
28
|
+
|
29
|
+
description <<EOF
|
30
|
+
A widget that yields whatever you instantiate it with.
|
31
|
+
|
32
|
+
This is most useful when you have a small but predictable input
|
33
|
+
that you don't want or can't pass via usual input channels like
|
34
|
+
STDIN.
|
35
|
+
|
36
|
+
Works just like you think on the command line (the process won't terminate)
|
37
|
+
|
38
|
+
$ echo something else | wu-local echo --input=hello
|
39
|
+
hello
|
40
|
+
EOF
|
41
|
+
|
42
|
+
field :input, Whatever, :default => nil, :doc => "The record to echo"
|
43
|
+
|
44
|
+
# Yields the `input` no matter what you pass it.
|
45
|
+
#
|
46
|
+
# @param [Object] _ the new input record which is ignored
|
47
|
+
# @yield [input]
|
48
|
+
# @yieldparam [Object] input the original input
|
49
|
+
def process _
|
50
|
+
yield input
|
51
|
+
end
|
52
|
+
register
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -3,58 +3,6 @@ require_relative('utils')
|
|
3
3
|
module Wukong
|
4
4
|
class Processor
|
5
5
|
|
6
|
-
# A widget that will log all incoming records.
|
7
|
-
#
|
8
|
-
# @example Logging records from the command line
|
9
|
-
#
|
10
|
-
# $ cat input
|
11
|
-
# 1
|
12
|
-
# 2
|
13
|
-
# 3
|
14
|
-
# $ cat input | wu-local logger
|
15
|
-
# 2012-11-28 18:20:46 [INFO] Logger: 1
|
16
|
-
# 2012-11-28 18:20:46 [INFO] Logger: 2
|
17
|
-
# 2012-11-28 18:20:46 [INFO] Logger: 3
|
18
|
-
#
|
19
|
-
# @example Logging records within a dataflow
|
20
|
-
#
|
21
|
-
# Wukong.dataflow(:uses_logger) do
|
22
|
-
# ... | logger
|
23
|
-
# end
|
24
|
-
class Logger < Processor
|
25
|
-
field :level, Symbol, :default => :info, :doc => "Log level priority"
|
26
|
-
|
27
|
-
description <<EOF
|
28
|
-
This processor passes all input records unmodified, making a log
|
29
|
-
statement on each one.
|
30
|
-
|
31
|
-
$ cat input
|
32
|
-
1
|
33
|
-
2
|
34
|
-
3
|
35
|
-
$ cat input | wu-local logger
|
36
|
-
INFO 2013-01-04 17:10:59 [Logger ] -- 1
|
37
|
-
INFO 2013-01-04 17:10:59 [Logger ] -- 2
|
38
|
-
INFO 2013-01-04 17:10:59 [Logger ] -- 3
|
39
|
-
|
40
|
-
You can set the priority level of the log messages with the --level
|
41
|
-
flag.
|
42
|
-
|
43
|
-
$ cat input | wu-local logger --level=debug
|
44
|
-
DEBUG 2013-01-04 17:10:59 [Logger ] -- 1
|
45
|
-
DEBUG 2013-01-04 17:10:59 [Logger ] -- 2
|
46
|
-
DEBUG 2013-01-04 17:10:59 [Logger ] -- 3
|
47
|
-
EOF
|
48
|
-
|
49
|
-
# Process a given `record` by logging it.
|
50
|
-
#
|
51
|
-
# @param [Object] record
|
52
|
-
def process(record)
|
53
|
-
log.send(level, record)
|
54
|
-
end
|
55
|
-
register
|
56
|
-
end
|
57
|
-
|
58
6
|
# A widget that extracts parts of incoming records.
|
59
7
|
#
|
60
8
|
# This widget can extract part of the following kinds of objects:
|
@@ -170,59 +118,5 @@ EOF
|
|
170
118
|
end
|
171
119
|
register
|
172
120
|
end
|
173
|
-
|
174
|
-
class Topic < Processor
|
175
|
-
|
176
|
-
field :topic, Symbol, :doc => "Topic to label the record with"
|
177
|
-
|
178
|
-
def process(record)
|
179
|
-
yield perform_action(record)
|
180
|
-
end
|
181
|
-
|
182
|
-
def perform_action(record)
|
183
|
-
assign_topic(record, topic)
|
184
|
-
end
|
185
|
-
|
186
|
-
def assign_topic(record, topic_name)
|
187
|
-
record.define_singleton_method(:topic){ topic_name }
|
188
|
-
record
|
189
|
-
end
|
190
|
-
register
|
191
|
-
end
|
192
|
-
|
193
|
-
# Until further notice, this processor is unusable due to the invocation of yield
|
194
|
-
# class Foreach < Processor
|
195
|
-
# def process(record, &blk)
|
196
|
-
# perform_action(record, &blk)
|
197
|
-
# end
|
198
|
-
# register
|
199
|
-
# end
|
200
|
-
|
201
|
-
class Map < Processor
|
202
|
-
def process(record)
|
203
|
-
yield perform_action(record)
|
204
|
-
end
|
205
|
-
register
|
206
|
-
end
|
207
|
-
|
208
|
-
class Flatten < Processor
|
209
|
-
def process(records)
|
210
|
-
records.respond_to?(:each) ? records.each{ |record| yield(record) } : yield(records)
|
211
|
-
end
|
212
|
-
register
|
213
|
-
end
|
214
|
-
|
215
|
-
# Mixin processor behavior
|
216
|
-
module BufferedProcessor
|
217
|
-
def setup() ; end
|
218
|
-
def process(record) @buffer << record ; end
|
219
|
-
def stop() ; end
|
220
|
-
end
|
221
|
-
|
222
|
-
module StdoutProcessor
|
223
|
-
def setup() $stdout.sync ; end
|
224
|
-
def process(record) $stdout.puts record ; end
|
225
|
-
def stop() ; end
|
226
|
-
end
|
227
121
|
end
|
228
122
|
end
|
@@ -432,6 +432,21 @@ EOF
|
|
432
432
|
end
|
433
433
|
register
|
434
434
|
end
|
435
|
+
|
436
|
+
# Select a record only if it is non-nil.
|
437
|
+
#
|
438
|
+
# @see Filter
|
439
|
+
class Compact < Filter
|
440
|
+
|
441
|
+
# Select a record only if it is non-nil.
|
442
|
+
#
|
443
|
+
# @param [Object] record
|
444
|
+
# @return [true, false]
|
445
|
+
def select?(record)
|
446
|
+
! record.nil?
|
447
|
+
end
|
448
|
+
register
|
449
|
+
end
|
435
450
|
|
436
451
|
end
|
437
452
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# A widget that will log all incoming records.
|
5
|
+
#
|
6
|
+
# @example Logging records from the command line
|
7
|
+
#
|
8
|
+
# $ cat input
|
9
|
+
# 1
|
10
|
+
# 2
|
11
|
+
# 3
|
12
|
+
# $ cat input | wu-local logger
|
13
|
+
# 2012-11-28 18:20:46 [INFO] Logger: 1
|
14
|
+
# 2012-11-28 18:20:46 [INFO] Logger: 2
|
15
|
+
# 2012-11-28 18:20:46 [INFO] Logger: 3
|
16
|
+
#
|
17
|
+
# @example Logging records within a dataflow
|
18
|
+
#
|
19
|
+
# Wukong.dataflow(:uses_logger) do
|
20
|
+
# ... | logger
|
21
|
+
# end
|
22
|
+
class Logger < Processor
|
23
|
+
field :level, Symbol, :default => :info, :doc => "Log level priority"
|
24
|
+
|
25
|
+
description <<EOF
|
26
|
+
This processor passes all input records unmodified, making a log
|
27
|
+
statement on each one.
|
28
|
+
|
29
|
+
$ cat input
|
30
|
+
1
|
31
|
+
2
|
32
|
+
3
|
33
|
+
$ cat input | wu-local logger
|
34
|
+
INFO 2013-01-04 17:10:59 [Logger ] -- 1
|
35
|
+
INFO 2013-01-04 17:10:59 [Logger ] -- 2
|
36
|
+
INFO 2013-01-04 17:10:59 [Logger ] -- 3
|
37
|
+
|
38
|
+
You can set the priority level of the log messages with the --level
|
39
|
+
flag.
|
40
|
+
|
41
|
+
$ cat input | wu-local logger --level=debug
|
42
|
+
DEBUG 2013-01-04 17:10:59 [Logger ] -- 1
|
43
|
+
DEBUG 2013-01-04 17:10:59 [Logger ] -- 2
|
44
|
+
DEBUG 2013-01-04 17:10:59 [Logger ] -- 3
|
45
|
+
EOF
|
46
|
+
|
47
|
+
# Process a given `record` by logging it.
|
48
|
+
#
|
49
|
+
# @param [Object] record
|
50
|
+
def process(record)
|
51
|
+
log.send(level, record)
|
52
|
+
end
|
53
|
+
register
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# Yield the result of this processor's action for each input
|
5
|
+
# record.
|
6
|
+
#
|
7
|
+
# @example Apply a function (like a parser) to each record
|
8
|
+
#
|
9
|
+
# Wukong.dataflow(:parser) do
|
10
|
+
# ... | map { |string| MyParser.parse(string) } | ...
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# @example Succintly map between objects
|
14
|
+
#
|
15
|
+
# Wukong.dataflow(:converter) do
|
16
|
+
# ... | my_book_parser | map(&:author) | author_processor | ...
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# Can also be called with the :compact option which will check if
|
20
|
+
# the result of the action is non falsy before yielding.
|
21
|
+
#
|
22
|
+
# @example Mapping but only if it exists
|
23
|
+
#
|
24
|
+
# Wukong.dataflow(:converter_and_trimmer) do
|
25
|
+
# ... | my_book_parser | map(compact: true, &:author) | processor_that_needs_an_author | ...
|
26
|
+
# end
|
27
|
+
class Map < Processor
|
28
|
+
|
29
|
+
field :compact, :boolean, default: false
|
30
|
+
|
31
|
+
# Call #perform_action on the input_record and yield the
|
32
|
+
# returned output record.
|
33
|
+
#
|
34
|
+
# If #compact then only yield the output record if it is not
|
35
|
+
# falsy.
|
36
|
+
#
|
37
|
+
# @param [Object] input_record
|
38
|
+
# @yield [output_record] if compact, then only yield if it is not falsy
|
39
|
+
# @yieldparam [Object] output_record the result of #perform_action
|
40
|
+
#
|
41
|
+
# @see Flatten
|
42
|
+
def process(input_record)
|
43
|
+
output_record = perform_action(input_record)
|
44
|
+
if compact
|
45
|
+
yield output_record if output_record
|
46
|
+
else
|
47
|
+
yield output_record
|
48
|
+
end
|
49
|
+
end
|
50
|
+
register
|
51
|
+
end
|
52
|
+
|
53
|
+
# If an input record defines the #each method then yield each of
|
54
|
+
# its records. Otherwise yield the input record.
|
55
|
+
#
|
56
|
+
# @example Turning one record into many
|
57
|
+
#
|
58
|
+
# Wukong.dataflow(:authors_to_books) do
|
59
|
+
# ... | author_parser | map(&:books) | flatten | book_processor | ...
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
# @see Map
|
63
|
+
class Flatten < Processor
|
64
|
+
|
65
|
+
# If input_record responds to #each then yield each of these as
|
66
|
+
# an output record. Else, just yield the input_record.
|
67
|
+
#
|
68
|
+
# @param [Object] input_record
|
69
|
+
# @yield [output_record]
|
70
|
+
# @yieldparam [Object] output_record
|
71
|
+
def process(input_record)
|
72
|
+
if input_record.respond_to?(:each)
|
73
|
+
input_record.each{ |output_record| yield(output_record) }
|
74
|
+
else
|
75
|
+
yield(input_record)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
register
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|