wukong 3.0.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/README.md +253 -45
- data/bin/wu +34 -0
- data/bin/wu-source +5 -0
- data/examples/Gemfile +0 -1
- data/examples/deploy_pack/Gemfile +0 -1
- data/examples/improver/tweet_summary.rb +73 -0
- data/examples/ruby_project/Gemfile +0 -1
- data/examples/splitter.rb +94 -0
- data/examples/twitter.rb +5 -0
- data/lib/hanuman.rb +1 -1
- data/lib/hanuman/graph.rb +39 -22
- data/lib/hanuman/stage.rb +46 -13
- data/lib/hanuman/tree.rb +67 -0
- data/lib/wukong.rb +6 -1
- data/lib/wukong/dataflow.rb +19 -48
- data/lib/wukong/driver.rb +176 -65
- data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
- data/lib/wukong/driver/wiring.rb +68 -0
- data/lib/wukong/local.rb +6 -4
- data/lib/wukong/local/runner.rb +14 -16
- data/lib/wukong/local/stdio_driver.rb +72 -12
- data/lib/wukong/processor.rb +1 -30
- data/lib/wukong/runner.rb +2 -0
- data/lib/wukong/runner/command_runner.rb +44 -0
- data/lib/wukong/source.rb +33 -0
- data/lib/wukong/source/source_driver.rb +74 -0
- data/lib/wukong/source/source_runner.rb +38 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
- data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
- data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/echo.rb +55 -0
- data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
- data/lib/wukong/widget/filters.rb +15 -0
- data/lib/wukong/widget/logger.rb +56 -0
- data/lib/wukong/widget/operators.rb +82 -0
- data/lib/wukong/widget/reducers.rb +2 -0
- data/lib/wukong/widget/reducers/improver.rb +71 -0
- data/lib/wukong/widget/reducers/join_xml.rb +37 -0
- data/lib/wukong/widget/serializers.rb +21 -6
- data/lib/wukong/widgets.rb +6 -3
- data/spec/hanuman/graph_spec.rb +73 -10
- data/spec/hanuman/stage_spec.rb +15 -0
- data/spec/hanuman/tree_spec.rb +119 -0
- data/spec/spec_helper.rb +13 -1
- data/spec/support/example_test_helpers.rb +0 -1
- data/spec/support/model_test_helpers.rb +1 -1
- data/spec/support/shared_context_for_graphs.rb +57 -0
- data/spec/support/shared_examples_for_builders.rb +8 -15
- data/spec/wukong/driver_spec.rb +152 -0
- data/spec/wukong/local/runner_spec.rb +1 -12
- data/spec/wukong/local/stdio_driver_spec.rb +73 -0
- data/spec/wukong/processor_spec.rb +0 -1
- data/spec/wukong/runner_spec.rb +2 -2
- data/spec/wukong/source_spec.rb +6 -0
- data/spec/wukong/widget/extract_spec.rb +101 -0
- data/spec/wukong/widget/logger_spec.rb +23 -0
- data/spec/wukong/widget/operators_spec.rb +25 -0
- data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
- data/spec/wukong/wu-source_spec.rb +32 -0
- data/spec/wukong/wu_spec.rb +14 -0
- data/wukong.gemspec +1 -2
- metadata +45 -28
- data/lib/wukong/local/tcp_driver.rb +0 -47
- data/spec/wu/geo/geolocated_spec.rb +0 -247
- data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative("reducers/accumulator")
|
2
|
+
require_relative("reducers/improver")
|
2
3
|
require_relative("reducers/sort")
|
3
4
|
require_relative("reducers/count")
|
4
5
|
require_relative("reducers/group")
|
@@ -6,3 +7,4 @@ require_relative("reducers/group_concat")
|
|
6
7
|
require_relative("reducers/moments")
|
7
8
|
require_relative("reducers/bin")
|
8
9
|
require_relative("reducers/uniq")
|
10
|
+
require_relative("reducers/join_xml")
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# A base widget for building more complex improver widgets.
|
5
|
+
class Improver < Processor
|
6
|
+
|
7
|
+
# The current group of records.
|
8
|
+
attr_accessor :group
|
9
|
+
|
10
|
+
# Sets up this improver by defining an initial key (with a
|
11
|
+
# value that is unlikely to be found in real data) and calling
|
12
|
+
# `#zero` with no record.
|
13
|
+
def setup
|
14
|
+
@key = :__first_group__
|
15
|
+
zero
|
16
|
+
end
|
17
|
+
|
18
|
+
def recordize record
|
19
|
+
record.split("\t")
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# All kinds of assumptions here,
|
24
|
+
# record is tab-delimited and the
|
25
|
+
# first field is a name of a function
|
26
|
+
# to call
|
27
|
+
#
|
28
|
+
def get_function record
|
29
|
+
record.first
|
30
|
+
end
|
31
|
+
|
32
|
+
# Processes the `record`.
|
33
|
+
def process(record)
|
34
|
+
fields = recordize(record)
|
35
|
+
func = get_function(fields)
|
36
|
+
case func
|
37
|
+
when 'zero' then
|
38
|
+
yield zero
|
39
|
+
when 'accumulate' then
|
40
|
+
accumulate(fields[1..-1])
|
41
|
+
when 'improve' then
|
42
|
+
yield improve(fields[1], self.group)
|
43
|
+
self.group = []
|
44
|
+
else
|
45
|
+
raise NoMethodError, "undefined method #{func} for Improver"
|
46
|
+
end
|
47
|
+
STDOUT.flush # WHY? Because.
|
48
|
+
end
|
49
|
+
|
50
|
+
# Starts accumulation for a new key. Return what you would
|
51
|
+
# with no improvements.
|
52
|
+
def zero
|
53
|
+
self.group = []
|
54
|
+
end
|
55
|
+
|
56
|
+
# Accumulates another +record+.
|
57
|
+
#
|
58
|
+
# @param [Object] record
|
59
|
+
def accumulate record
|
60
|
+
self.group << record
|
61
|
+
end
|
62
|
+
|
63
|
+
# Improve prev with group
|
64
|
+
#
|
65
|
+
#
|
66
|
+
def improve prev, group
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Wukong
|
2
|
+
class Processor
|
3
|
+
|
4
|
+
# Joins XML input data based on a root tag.
|
5
|
+
class JoinXML < Processor
|
6
|
+
|
7
|
+
field :root, String, default: 'xml', doc: "Name of the root XML element"
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@lines = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def process line
|
14
|
+
if match = terminator.match(line)
|
15
|
+
if match.end(0) == line.size
|
16
|
+
@lines << line
|
17
|
+
else
|
18
|
+
@lines << line[0...match.end(0)]
|
19
|
+
end
|
20
|
+
yield @lines.join("\n")
|
21
|
+
@lines = []
|
22
|
+
@lines << line[match.end(0)..-1] unless match.end(0) == line.size
|
23
|
+
else
|
24
|
+
@lines << line
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def terminator
|
29
|
+
%r{<\s*/\s*#{root}\s*>}i
|
30
|
+
end
|
31
|
+
|
32
|
+
register :join_xml
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
@@ -4,13 +4,11 @@ module Wukong
|
|
4
4
|
SerializerError = Class.new(Error)
|
5
5
|
|
6
6
|
class Serializer < Processor
|
7
|
-
field :on_error, String, default: 'log', :doc => "Action to take upon an error, either 'log' or 'notify'"
|
8
7
|
|
9
8
|
def handle_error(record, err)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
end
|
9
|
+
return if err.class == Errno::EPIPE
|
10
|
+
log.error "#{err.class}: #{err.message}"
|
11
|
+
err.backtrace.each { |line| log.debug(line) }
|
14
12
|
end
|
15
13
|
|
16
14
|
end
|
@@ -292,10 +290,27 @@ EOF
|
|
292
290
|
def process(record)
|
293
291
|
wire_format = record.try(:to_wire) || record
|
294
292
|
raise SerializerError.new("Can only recordize a Hash-like record") unless wire_format.is_a?(Hash)
|
295
|
-
|
293
|
+
klass = model_class_for(wire_format)
|
294
|
+
if klass
|
295
|
+
yield klass.receive(wire_format)
|
296
|
+
else
|
297
|
+
log.error("No default model class and no explicit model for: #{wire_format.inspect}")
|
298
|
+
end
|
296
299
|
rescue => e
|
297
300
|
handle_error(record, e)
|
298
301
|
end
|
302
|
+
|
303
|
+
def model_class_for(record)
|
304
|
+
if explicit_type = (record[:_type] || record["_type"])
|
305
|
+
begin
|
306
|
+
return explicit_type.constantize
|
307
|
+
rescue NameError => e
|
308
|
+
log.warn("Could not find a class for <#{explicit_type}>")
|
309
|
+
end
|
310
|
+
end
|
311
|
+
return model if model
|
312
|
+
end
|
313
|
+
|
299
314
|
register
|
300
315
|
end
|
301
316
|
end
|
data/lib/wukong/widgets.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
require 'wukong/widget/processors'
|
2
|
-
require 'wukong/widget/reducers'
|
3
|
-
require 'wukong/widget/serializers'
|
4
1
|
require 'wukong/widget/filters'
|
2
|
+
require 'wukong/widget/serializers'
|
3
|
+
require 'wukong/widget/operators'
|
4
|
+
require 'wukong/widget/reducers'
|
5
|
+
require 'wukong/widget/extract'
|
6
|
+
require 'wukong/widget/logger'
|
7
|
+
require 'wukong/widget/echo'
|
data/spec/hanuman/graph_spec.rb
CHANGED
@@ -2,17 +2,80 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Hanuman::Graph, :hanuman => true do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
5
|
+
include_context "graphs"
|
6
|
+
|
7
|
+
context "#ancestors" do
|
8
|
+
context "called without any arguments" do
|
9
|
+
it "returns all stages with an ancestor" do
|
10
|
+
graph.ancestors.size.should == 4
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
context "called with a stage as the first argument" do
|
15
|
+
it "returns the immediate ancestors of the stage" do
|
16
|
+
graph.ancestors(graph.stages[:second]).size.should == 2
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
15
20
|
|
21
|
+
context "#descendents" do
|
22
|
+
context "called without any arguments" do
|
23
|
+
it "returns all stages with a descendent" do
|
24
|
+
graph.descendents.size.should == 4
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "called with a stage as the first argument" do
|
29
|
+
it "returns the immediate descendents of the stage" do
|
30
|
+
graph.descendents(graph.stages[:second]).size.should == 2
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "#add_stage" do
|
36
|
+
let(:stage) { Hanuman::Stage.receive(label: :orphan) }
|
37
|
+
it "adds the stage to the graph" do
|
38
|
+
expect { graph.add_stage(stage) }.to change { graph.stages[:orphan] }.from(nil).to(stage)
|
39
|
+
end
|
40
|
+
it "doesn't create any links" do
|
41
|
+
expect { graph.add_stage(stage) }.to_not change { graph.links }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context "#add_link" do
|
46
|
+
context "when adding a new stage" do
|
47
|
+
let(:from) { graph.stages[:fourth] }
|
48
|
+
let(:into) { Hanuman::Stage.receive(label: :fifth) }
|
49
|
+
it "adds the new stage to the graph" do
|
50
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.stages[:fifth] }.from(nil).to(into)
|
51
|
+
end
|
52
|
+
it "adds the new link to the graph" do
|
53
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
context "when adding an existing link" do
|
57
|
+
let(:from) { graph.stages[:third_a] }
|
58
|
+
let(:into) { graph.stages[:fourth] }
|
59
|
+
it "duplicates the link in the graph" do
|
60
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
context "when making a cycle" do
|
64
|
+
let(:from) { graph.stages[:fourth] }
|
65
|
+
let(:into) { graph.stages[:first_a] }
|
66
|
+
it "adds the link in the graph" do
|
67
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
context "when linking a stage to itself" do
|
71
|
+
let(:from) { tree.stages[:fourth] }
|
72
|
+
let(:into) { tree.stages[:fourth] }
|
73
|
+
it "adds the link in the graph" do
|
74
|
+
expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
16
79
|
end
|
17
80
|
|
18
81
|
describe Hanuman::GraphBuilder, :hanuman => true do
|
data/spec/hanuman/stage_spec.rb
CHANGED
@@ -63,4 +63,19 @@ describe Hanuman::StageBuilder, :hanuman => true do
|
|
63
63
|
subject.serialize.should_not include(:links)
|
64
64
|
end
|
65
65
|
end
|
66
|
+
|
67
|
+
context '#into' do
|
68
|
+
subject { described_class.receive(label: :pyro) }
|
69
|
+
let(:other_stage){ described_class.receive(label: :iceman) }
|
70
|
+
|
71
|
+
it 'returns the linked into stage for chaining' do
|
72
|
+
subject.into(other_stage).should be(other_stage)
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'links two stages together with a simple link' do
|
76
|
+
subject.into(other_stage)
|
77
|
+
subject.links.should be_any{ |link| link.from == :pyro and link.into == :iceman }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
66
81
|
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hanuman::Tree do
|
4
|
+
|
5
|
+
include_context "graphs"
|
6
|
+
|
7
|
+
it "iterates over each stage in tree-order" do
|
8
|
+
tree.directed_sort.should == [:first, :second, :third_b, :third_a, :fourth]
|
9
|
+
end
|
10
|
+
|
11
|
+
context "#root" do
|
12
|
+
context "when called without arguments" do
|
13
|
+
it "returns the root of the whole tree" do
|
14
|
+
tree.root.should_not be_nil
|
15
|
+
tree.root.label.should == :first
|
16
|
+
end
|
17
|
+
it "returns the root of a sigle stage tree" do
|
18
|
+
single_stage_tree.root.should_not be_nil
|
19
|
+
single_stage_tree.root.label.should == :first
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
context "when called with a stage" do
|
24
|
+
it "should return the root of the whole tree" do
|
25
|
+
tree.root(tree.stages[:fourth]).should_not be_nil
|
26
|
+
tree.root(tree.stages[:fourth]).label.should == :first
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "#ancestor" do
|
32
|
+
it "returns the ancestor of a stage" do
|
33
|
+
tree.ancestor(tree.stages[:fourth]).should_not be_nil
|
34
|
+
tree.ancestor(tree.stages[:fourth]).label.should == :third_a
|
35
|
+
end
|
36
|
+
it "returns nil for the root" do
|
37
|
+
tree.ancestor(tree.root).should be_nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "#leaves" do
|
42
|
+
it "returns the leaf stages of a tree" do
|
43
|
+
tree.leaves.map(&:label).should include(:fourth, :third_b)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "returns the root of a tree that has no other leaves" do
|
47
|
+
single_stage_tree.leaves.map(&:label).should include(:first)
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
context "#prepend" do
|
53
|
+
let(:zeroth) { Hanuman::Stage.receive(label: :zeroth) }
|
54
|
+
it "adds the given stage" do
|
55
|
+
expect { tree.prepend(zeroth) }.to change { tree.stages[:zeroth] }.from(nil).to(zeroth)
|
56
|
+
end
|
57
|
+
it "adds a link from the new stage to the old root" do
|
58
|
+
expect { tree.prepend(zeroth) }.to change { tree.has_link?(zeroth, tree.stages[:first]) }.from(false).to(true)
|
59
|
+
end
|
60
|
+
it "the root becomes the new stage" do
|
61
|
+
expect { tree.prepend(zeroth) }.to change { tree.root.label }.from(:first).to(:zeroth)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "#append" do
|
66
|
+
let(:fifth) { Hanuman::Stage.receive(label: :fifth) }
|
67
|
+
it "adds a new stage for each leaf" do
|
68
|
+
expect { tree.append(fifth) }.to change { tree.stages.size }.by(2)
|
69
|
+
end
|
70
|
+
it "adds a link for each of the new stages" do
|
71
|
+
expect { tree.append(fifth) }.to change { tree.links.size }.by(2)
|
72
|
+
end
|
73
|
+
it "but doesn't change the number of leaves " do
|
74
|
+
expect { tree.append(fifth) }.to_not change { tree.leaves.size }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "#add_link" do
|
79
|
+
context "when adding a new stage" do
|
80
|
+
let(:from) { tree.stages[:fourth] }
|
81
|
+
let(:into) { Hanuman::Stage.receive(label: :fifth) }
|
82
|
+
it "adds the new stage to the tree" do
|
83
|
+
expect { tree.add_link(:simple, from, into) }.to change { tree.stages[:fifth] }.from(nil).to(into)
|
84
|
+
end
|
85
|
+
it "adds the new link to the tree" do
|
86
|
+
expect { tree.add_link(:simple, from, into) }.to change { tree.links.size }.by(1)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
context "when adding an existing link" do
|
90
|
+
let(:from) { tree.stages[:third_a] }
|
91
|
+
let(:into) { tree.stages[:fourth] }
|
92
|
+
it "doesn't duplicate the link in the tree" do
|
93
|
+
expect { tree.add_link(:simple, from, into) }.to_not change { tree.links.size }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
context "when adding a link to a stage with an existing parent" do
|
97
|
+
let(:from) { Hanuman::Stage.receive(label: :fifth) }
|
98
|
+
let(:into) { tree.stages[:fourth] }
|
99
|
+
it "raises an error" do
|
100
|
+
expect { tree.add_link(:simple, from, into) }.to raise_error(Hanuman::Tree::MultipleRoots)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
context "when making a cycle" do
|
104
|
+
let(:from) { tree.stages[:fourth] }
|
105
|
+
let(:into) { tree.stages[:first] }
|
106
|
+
it "raises an error" do
|
107
|
+
expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
context "when linking a stage to itself" do
|
111
|
+
let(:from) { tree.stages[:fourth] }
|
112
|
+
let(:into) { tree.stages[:fourth] }
|
113
|
+
it "raises an error" do
|
114
|
+
expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'wukong'
|
2
2
|
require 'wukong/spec_helpers'
|
3
|
+
require 'wukong/source'
|
4
|
+
require_relative './support/shared_context_for_graphs'
|
3
5
|
require_relative './support/shared_examples_for_shortcuts'
|
4
6
|
require_relative './support/shared_examples_for_builders'
|
5
7
|
require_relative './support/shared_context_for_reducers'
|
@@ -18,13 +20,23 @@ RSpec.configure do |config|
|
|
18
20
|
end
|
19
21
|
|
20
22
|
def generic_runner *args
|
21
|
-
|
23
|
+
# the wu-generic program doesn't have to exist for this Runner to
|
24
|
+
# work if it's called from Ruby code
|
25
|
+
runner(Wukong::Runner, 'wu-generic', *args)
|
22
26
|
end
|
23
27
|
|
24
28
|
def wu_local *args
|
25
29
|
command('wu-local', *args)
|
26
30
|
end
|
27
31
|
|
32
|
+
def wu_source *args
|
33
|
+
command('wu-source', *args)
|
34
|
+
end
|
35
|
+
|
36
|
+
def wu *args
|
37
|
+
command('wu', *args)
|
38
|
+
end
|
39
|
+
|
28
40
|
# FIXME Why is this here?
|
29
41
|
config.treat_symbols_as_metadata_keys_with_true_values = true
|
30
42
|
|