wukong 3.0.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/README.md +253 -45
- data/bin/wu +34 -0
- data/bin/wu-source +5 -0
- data/examples/Gemfile +0 -1
- data/examples/deploy_pack/Gemfile +0 -1
- data/examples/improver/tweet_summary.rb +73 -0
- data/examples/ruby_project/Gemfile +0 -1
- data/examples/splitter.rb +94 -0
- data/examples/twitter.rb +5 -0
- data/lib/hanuman.rb +1 -1
- data/lib/hanuman/graph.rb +39 -22
- data/lib/hanuman/stage.rb +46 -13
- data/lib/hanuman/tree.rb +67 -0
- data/lib/wukong.rb +6 -1
- data/lib/wukong/dataflow.rb +19 -48
- data/lib/wukong/driver.rb +176 -65
- data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
- data/lib/wukong/driver/wiring.rb +68 -0
- data/lib/wukong/local.rb +6 -4
- data/lib/wukong/local/runner.rb +14 -16
- data/lib/wukong/local/stdio_driver.rb +72 -12
- data/lib/wukong/processor.rb +1 -30
- data/lib/wukong/runner.rb +2 -0
- data/lib/wukong/runner/command_runner.rb +44 -0
- data/lib/wukong/source.rb +33 -0
- data/lib/wukong/source/source_driver.rb +74 -0
- data/lib/wukong/source/source_runner.rb +38 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
- data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
- data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/echo.rb +55 -0
- data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
- data/lib/wukong/widget/filters.rb +15 -0
- data/lib/wukong/widget/logger.rb +56 -0
- data/lib/wukong/widget/operators.rb +82 -0
- data/lib/wukong/widget/reducers.rb +2 -0
- data/lib/wukong/widget/reducers/improver.rb +71 -0
- data/lib/wukong/widget/reducers/join_xml.rb +37 -0
- data/lib/wukong/widget/serializers.rb +21 -6
- data/lib/wukong/widgets.rb +6 -3
- data/spec/hanuman/graph_spec.rb +73 -10
- data/spec/hanuman/stage_spec.rb +15 -0
- data/spec/hanuman/tree_spec.rb +119 -0
- data/spec/spec_helper.rb +13 -1
- data/spec/support/example_test_helpers.rb +0 -1
- data/spec/support/model_test_helpers.rb +1 -1
- data/spec/support/shared_context_for_graphs.rb +57 -0
- data/spec/support/shared_examples_for_builders.rb +8 -15
- data/spec/wukong/driver_spec.rb +152 -0
- data/spec/wukong/local/runner_spec.rb +1 -12
- data/spec/wukong/local/stdio_driver_spec.rb +73 -0
- data/spec/wukong/processor_spec.rb +0 -1
- data/spec/wukong/runner_spec.rb +2 -2
- data/spec/wukong/source_spec.rb +6 -0
- data/spec/wukong/widget/extract_spec.rb +101 -0
- data/spec/wukong/widget/logger_spec.rb +23 -0
- data/spec/wukong/widget/operators_spec.rb +25 -0
- data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
- data/spec/wukong/wu-source_spec.rb +32 -0
- data/spec/wukong/wu_spec.rb +14 -0
- data/wukong.gemspec +1 -2
- metadata +45 -28
- data/lib/wukong/local/tcp_driver.rb +0 -47
- data/spec/wu/geo/geolocated_spec.rb +0 -247
- data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative("reducers/accumulator")
|
2
|
+
require_relative("reducers/improver")
|
2
3
|
require_relative("reducers/sort")
|
3
4
|
require_relative("reducers/count")
|
4
5
|
require_relative("reducers/group")
|
@@ -6,3 +7,4 @@ require_relative("reducers/group_concat")
|
|
6
7
|
require_relative("reducers/moments")
|
7
8
|
require_relative("reducers/bin")
|
8
9
|
require_relative("reducers/uniq")
|
10
|
+
require_relative("reducers/join_xml")
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# A base widget for building more complex improver widgets.
|
5
|
+
class Improver < Processor
|
6
|
+
|
7
|
+
# The current group of records.
|
8
|
+
attr_accessor :group
|
9
|
+
|
10
|
+
# Sets up this improver by defining an initial key (with a
|
11
|
+
# value that is unlikely to be found in real data) and calling
|
12
|
+
# `#zero` with no record.
|
13
|
+
def setup
|
14
|
+
@key = :__first_group__
|
15
|
+
zero
|
16
|
+
end
|
17
|
+
|
18
|
+
def recordize record
|
19
|
+
record.split("\t")
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# All kinds of assumptions here,
|
24
|
+
# record is tab-delimited and the
|
25
|
+
# first field is a name of a function
|
26
|
+
# to call
|
27
|
+
#
|
28
|
+
def get_function record
|
29
|
+
record.first
|
30
|
+
end
|
31
|
+
|
32
|
+
# Processes the `record`.
|
33
|
+
def process(record)
|
34
|
+
fields = recordize(record)
|
35
|
+
func = get_function(fields)
|
36
|
+
case func
|
37
|
+
when 'zero' then
|
38
|
+
yield zero
|
39
|
+
when 'accumulate' then
|
40
|
+
accumulate(fields[1..-1])
|
41
|
+
when 'improve' then
|
42
|
+
yield improve(fields[1], self.group)
|
43
|
+
self.group = []
|
44
|
+
else
|
45
|
+
raise NoMethodError, "undefined method #{func} for Improver"
|
46
|
+
end
|
47
|
+
STDOUT.flush # WHY? Because.
|
48
|
+
end
|
49
|
+
|
50
|
+
# Starts accumulation for a new key. Return what you would
|
51
|
+
# with no improvements.
|
52
|
+
def zero
|
53
|
+
self.group = []
|
54
|
+
end
|
55
|
+
|
56
|
+
# Accumulates another +record+.
|
57
|
+
#
|
58
|
+
# @param [Object] record
|
59
|
+
def accumulate record
|
60
|
+
self.group << record
|
61
|
+
end
|
62
|
+
|
63
|
+
# Improve prev with group
|
64
|
+
#
|
65
|
+
#
|
66
|
+
def improve prev, group
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# Joins XML input data based on a root tag.
|
5
|
+
class JoinXML < Processor
|
6
|
+
|
7
|
+
field :root, String, default: 'xml', doc: "Name of the root XML element"
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@lines = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def process line
|
14
|
+
if match = terminator.match(line)
|
15
|
+
if match.end(0) == line.size
|
16
|
+
@lines << line
|
17
|
+
else
|
18
|
+
@lines << line[0...match.end(0)]
|
19
|
+
end
|
20
|
+
yield @lines.join("\n")
|
21
|
+
@lines = []
|
22
|
+
@lines << line[match.end(0)..-1] unless match.end(0) == line.size
|
23
|
+
else
|
24
|
+
@lines << line
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def terminator
|
29
|
+
%r{<\s*/\s*#{root}\s*>}i
|
30
|
+
end
|
31
|
+
|
32
|
+
register :join_xml
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
@@ -4,13 +4,11 @@ module Wukong
|
|
4
4
|
SerializerError = Class.new(Error)
|
5
5
|
|
6
6
|
class Serializer < Processor
|
7
|
-
field :on_error, String, default: 'log', :doc => "Action to take upon an error, either 'log' or 'notify'"
|
8
7
|
|
9
8
|
def handle_error(record, err)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
end
|
9
|
+
return if err.class == Errno::EPIPE
|
10
|
+
log.error "#{err.class}: #{err.message}"
|
11
|
+
err.backtrace.each { |line| log.debug(line) }
|
14
12
|
end
|
15
13
|
|
16
14
|
end
|
@@ -292,10 +290,27 @@ EOF
|
|
292
290
|
def process(record)
|
293
291
|
wire_format = record.try(:to_wire) || record
|
294
292
|
raise SerializerError.new("Can only recordize a Hash-like record") unless wire_format.is_a?(Hash)
|
295
|
-
|
293
|
+
klass = model_class_for(wire_format)
|
294
|
+
if klass
|
295
|
+
yield klass.receive(wire_format)
|
296
|
+
else
|
297
|
+
log.error("No default model class and no explicit model for: #{wire_format.inspect}")
|
298
|
+
end
|
296
299
|
rescue => e
|
297
300
|
handle_error(record, e)
|
298
301
|
end
|
302
|
+
|
303
|
+
def model_class_for(record)
|
304
|
+
if explicit_type = (record[:_type] || record["_type"])
|
305
|
+
begin
|
306
|
+
return explicit_type.constantize
|
307
|
+
rescue NameError => e
|
308
|
+
log.warn("Could not find a class for <#{explicit_type}>")
|
309
|
+
end
|
310
|
+
end
|
311
|
+
return model if model
|
312
|
+
end
|
313
|
+
|
299
314
|
register
|
300
315
|
end
|
301
316
|
end
|
data/lib/wukong/widgets.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
require 'wukong/widget/processors'
|
2
|
-
require 'wukong/widget/reducers'
|
3
|
-
require 'wukong/widget/serializers'
|
4
1
|
require 'wukong/widget/filters'
|
2
|
+
require 'wukong/widget/serializers'
|
3
|
+
require 'wukong/widget/operators'
|
4
|
+
require 'wukong/widget/reducers'
|
5
|
+
require 'wukong/widget/extract'
|
6
|
+
require 'wukong/widget/logger'
|
7
|
+
require 'wukong/widget/echo'
|
data/spec/hanuman/graph_spec.rb
CHANGED
@@ -2,17 +2,80 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Hanuman::Graph, :hanuman => true do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
5
|
+
include_context "graphs"
|
6
|
+
|
7
|
+
context "#ancestors" do
|
8
|
+
context "called without any arguments" do
|
9
|
+
it "returns all stages with an ancestor" do
|
10
|
+
graph.ancestors.size.should == 4
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
context "called with a stage as the first argument" do
|
15
|
+
it "returns the immediate ancestors of the stage" do
|
16
|
+
graph.ancestors(graph.stages[:second]).size.should == 2
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
15
20
|
|
21
|
+
context "#descendents" do
|
22
|
+
context "called without any arguments" do
|
23
|
+
it "returns all stages with a descendent" do
|
24
|
+
graph.descendents.size.should == 4
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "called with a stage as the first argument" do
|
29
|
+
it "returns the immediate descendents of the stage" do
|
30
|
+
graph.descendents(graph.stages[:second]).size.should == 2
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "#add_stage" do
|
36
|
+
let(:stage) { Hanuman::Stage.receive(label: :orphan) }
|
37
|
+
it "adds the stage to the graph" do
|
38
|
+
expect { graph.add_stage(stage) }.to change { graph.stages[:orphan] }.from(nil).to(stage)
|
39
|
+
end
|
40
|
+
it "doesn't create any links" do
|
41
|
+
expect { graph.add_stage(stage) }.to_not change { graph.links }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context "#add_link" do
|
46
|
+
context "when adding a new stage" do
|
47
|
+
let(:from) { graph.stages[:fourth] }
|
48
|
+
let(:into) { Hanuman::Stage.receive(label: :fifth) }
|
49
|
+
it "adds the new stage to the graph" do
|
50
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.stages[:fifth] }.from(nil).to(into)
|
51
|
+
end
|
52
|
+
it "adds the new link to the graph" do
|
53
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
context "when adding an existing link" do
|
57
|
+
let(:from) { graph.stages[:third_a] }
|
58
|
+
let(:into) { graph.stages[:fourth] }
|
59
|
+
it "duplicates the link in the graph" do
|
60
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
context "when making a cycle" do
|
64
|
+
let(:from) { graph.stages[:fourth] }
|
65
|
+
let(:into) { graph.stages[:first_a] }
|
66
|
+
it "adds the link in the graph" do
|
67
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
context "when linking a stage to itself" do
|
71
|
+
let(:from) { tree.stages[:fourth] }
|
72
|
+
let(:into) { tree.stages[:fourth] }
|
73
|
+
it "adds the link in the graph" do
|
74
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
16
79
|
end
|
17
80
|
|
18
81
|
describe Hanuman::GraphBuilder, :hanuman => true do
|
data/spec/hanuman/stage_spec.rb
CHANGED
@@ -63,4 +63,19 @@ describe Hanuman::StageBuilder, :hanuman => true do
|
|
63
63
|
subject.serialize.should_not include(:links)
|
64
64
|
end
|
65
65
|
end
|
66
|
+
|
67
|
+
context '#into' do
|
68
|
+
subject { described_class.receive(label: :pyro) }
|
69
|
+
let(:other_stage){ described_class.receive(label: :iceman) }
|
70
|
+
|
71
|
+
it 'returns the linked into stage for chaining' do
|
72
|
+
subject.into(other_stage).should be(other_stage)
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'links two stages together with a simple link' do
|
76
|
+
subject.into(other_stage)
|
77
|
+
subject.links.should be_any{ |link| link.from == :pyro and link.into == :iceman }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
66
81
|
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hanuman::Tree do
|
4
|
+
|
5
|
+
include_context "graphs"
|
6
|
+
|
7
|
+
it "iterates over each stage in tree-order" do
|
8
|
+
tree.directed_sort.should == [:first, :second, :third_b, :third_a, :fourth]
|
9
|
+
end
|
10
|
+
|
11
|
+
context "#root" do
|
12
|
+
context "when called without arguments" do
|
13
|
+
it "returns the root of the whole tree" do
|
14
|
+
tree.root.should_not be_nil
|
15
|
+
tree.root.label.should == :first
|
16
|
+
end
|
17
|
+
it "returns the root of a sigle stage tree" do
|
18
|
+
single_stage_tree.root.should_not be_nil
|
19
|
+
single_stage_tree.root.label.should == :first
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
context "when called with a stage" do
|
24
|
+
it "should return the root of the whole tree" do
|
25
|
+
tree.root(tree.stages[:fourth]).should_not be_nil
|
26
|
+
tree.root(tree.stages[:fourth]).label.should == :first
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "#ancestor" do
|
32
|
+
it "returns the ancestor of a stage" do
|
33
|
+
tree.ancestor(tree.stages[:fourth]).should_not be_nil
|
34
|
+
tree.ancestor(tree.stages[:fourth]).label.should == :third_a
|
35
|
+
end
|
36
|
+
it "returns nil for the root" do
|
37
|
+
tree.ancestor(tree.root).should be_nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "#leaves" do
|
42
|
+
it "returns the leaf stages of a tree" do
|
43
|
+
tree.leaves.map(&:label).should include(:fourth, :third_b)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "returns the root of a tree that has no other leaves" do
|
47
|
+
single_stage_tree.leaves.map(&:label).should include(:first)
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
context "#prepend" do
|
53
|
+
let(:zeroth) { Hanuman::Stage.receive(label: :zeroth) }
|
54
|
+
it "adds the given stage" do
|
55
|
+
expect { tree.prepend(zeroth) }.to change { tree.stages[:zeroth] }.from(nil).to(zeroth)
|
56
|
+
end
|
57
|
+
it "adds a link from the new stage to the old root" do
|
58
|
+
expect { tree.prepend(zeroth) }.to change { tree.has_link?(zeroth, tree.stages[:first]) }.from(false).to(true)
|
59
|
+
end
|
60
|
+
it "the root becomes the new stage" do
|
61
|
+
expect { tree.prepend(zeroth) }.to change { tree.root.label }.from(:first).to(:zeroth)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "#append" do
|
66
|
+
let(:fifth) { Hanuman::Stage.receive(label: :fifth) }
|
67
|
+
it "adds a new stage for each leaf" do
|
68
|
+
expect { tree.append(fifth) }.to change { tree.stages.size }.by(2)
|
69
|
+
end
|
70
|
+
it "adds a link for each of the new stages" do
|
71
|
+
expect { tree.append(fifth) }.to change { tree.links.size }.by(2)
|
72
|
+
end
|
73
|
+
it "but doesn't change the number of leaves " do
|
74
|
+
expect { tree.append(fifth) }.to_not change { tree.leaves.size }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "#add_link" do
|
79
|
+
context "when adding a new stage" do
|
80
|
+
let(:from) { tree.stages[:fourth] }
|
81
|
+
let(:into) { Hanuman::Stage.receive(label: :fifth) }
|
82
|
+
it "adds the new stage to the tree" do
|
83
|
+
expect { tree.add_link(:simple, from, into) }.to change { tree.stages[:fifth] }.from(nil).to(into)
|
84
|
+
end
|
85
|
+
it "adds the new link to the tree" do
|
86
|
+
expect { tree.add_link(:simple, from, into) }.to change { tree.links.size }.by(1)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
context "when adding an existing link" do
|
90
|
+
let(:from) { tree.stages[:third_a] }
|
91
|
+
let(:into) { tree.stages[:fourth] }
|
92
|
+
it "doesn't duplicate the link in the tree" do
|
93
|
+
expect { tree.add_link(:simple, from, into) }.to_not change { tree.links.size }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
context "when adding a link to a stage with an existing parent" do
|
97
|
+
let(:from) { Hanuman::Stage.receive(label: :fifth) }
|
98
|
+
let(:into) { tree.stages[:fourth] }
|
99
|
+
it "raises an error" do
|
100
|
+
expect { tree.add_link(:simple, from, into) }.to raise_error(Hanuman::Tree::MultipleRoots)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
context "when making a cycle" do
|
104
|
+
let(:from) { tree.stages[:fourth] }
|
105
|
+
let(:into) { tree.stages[:first] }
|
106
|
+
it "raises an error" do
|
107
|
+
expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
context "when linking a stage to itself" do
|
111
|
+
let(:from) { tree.stages[:fourth] }
|
112
|
+
let(:into) { tree.stages[:fourth] }
|
113
|
+
it "raises an error" do
|
114
|
+
expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'wukong'
|
2
2
|
require 'wukong/spec_helpers'
|
3
|
+
require 'wukong/source'
|
4
|
+
require_relative './support/shared_context_for_graphs'
|
3
5
|
require_relative './support/shared_examples_for_shortcuts'
|
4
6
|
require_relative './support/shared_examples_for_builders'
|
5
7
|
require_relative './support/shared_context_for_reducers'
|
@@ -18,13 +20,23 @@ RSpec.configure do |config|
|
|
18
20
|
end
|
19
21
|
|
20
22
|
def generic_runner *args
|
21
|
-
|
23
|
+
# the wu-generic program doesn't have to exist for this Runner to
|
24
|
+
# work if it's called from Ruby code
|
25
|
+
runner(Wukong::Runner, 'wu-generic', *args)
|
22
26
|
end
|
23
27
|
|
24
28
|
def wu_local *args
|
25
29
|
command('wu-local', *args)
|
26
30
|
end
|
27
31
|
|
32
|
+
def wu_source *args
|
33
|
+
command('wu-source', *args)
|
34
|
+
end
|
35
|
+
|
36
|
+
def wu *args
|
37
|
+
command('wu', *args)
|
38
|
+
end
|
39
|
+
|
28
40
|
# FIXME Why is this here?
|
29
41
|
config.treat_symbols_as_metadata_keys_with_true_values = true
|
30
42
|
|