wukong 3.0.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +1 -1
  3. data/README.md +253 -45
  4. data/bin/wu +34 -0
  5. data/bin/wu-source +5 -0
  6. data/examples/Gemfile +0 -1
  7. data/examples/deploy_pack/Gemfile +0 -1
  8. data/examples/improver/tweet_summary.rb +73 -0
  9. data/examples/ruby_project/Gemfile +0 -1
  10. data/examples/splitter.rb +94 -0
  11. data/examples/twitter.rb +5 -0
  12. data/lib/hanuman.rb +1 -1
  13. data/lib/hanuman/graph.rb +39 -22
  14. data/lib/hanuman/stage.rb +46 -13
  15. data/lib/hanuman/tree.rb +67 -0
  16. data/lib/wukong.rb +6 -1
  17. data/lib/wukong/dataflow.rb +19 -48
  18. data/lib/wukong/driver.rb +176 -65
  19. data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
  20. data/lib/wukong/driver/wiring.rb +68 -0
  21. data/lib/wukong/local.rb +6 -4
  22. data/lib/wukong/local/runner.rb +14 -16
  23. data/lib/wukong/local/stdio_driver.rb +72 -12
  24. data/lib/wukong/processor.rb +1 -30
  25. data/lib/wukong/runner.rb +2 -0
  26. data/lib/wukong/runner/command_runner.rb +44 -0
  27. data/lib/wukong/source.rb +33 -0
  28. data/lib/wukong/source/source_driver.rb +74 -0
  29. data/lib/wukong/source/source_runner.rb +38 -0
  30. data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
  31. data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
  32. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
  33. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
  34. data/lib/wukong/version.rb +1 -1
  35. data/lib/wukong/widget/echo.rb +55 -0
  36. data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
  37. data/lib/wukong/widget/filters.rb +15 -0
  38. data/lib/wukong/widget/logger.rb +56 -0
  39. data/lib/wukong/widget/operators.rb +82 -0
  40. data/lib/wukong/widget/reducers.rb +2 -0
  41. data/lib/wukong/widget/reducers/improver.rb +71 -0
  42. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  43. data/lib/wukong/widget/serializers.rb +21 -6
  44. data/lib/wukong/widgets.rb +6 -3
  45. data/spec/hanuman/graph_spec.rb +73 -10
  46. data/spec/hanuman/stage_spec.rb +15 -0
  47. data/spec/hanuman/tree_spec.rb +119 -0
  48. data/spec/spec_helper.rb +13 -1
  49. data/spec/support/example_test_helpers.rb +0 -1
  50. data/spec/support/model_test_helpers.rb +1 -1
  51. data/spec/support/shared_context_for_graphs.rb +57 -0
  52. data/spec/support/shared_examples_for_builders.rb +8 -15
  53. data/spec/wukong/driver_spec.rb +152 -0
  54. data/spec/wukong/local/runner_spec.rb +1 -12
  55. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  56. data/spec/wukong/processor_spec.rb +0 -1
  57. data/spec/wukong/runner_spec.rb +2 -2
  58. data/spec/wukong/source_spec.rb +6 -0
  59. data/spec/wukong/widget/extract_spec.rb +101 -0
  60. data/spec/wukong/widget/logger_spec.rb +23 -0
  61. data/spec/wukong/widget/operators_spec.rb +25 -0
  62. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  63. data/spec/wukong/wu-source_spec.rb +32 -0
  64. data/spec/wukong/wu_spec.rb +14 -0
  65. data/wukong.gemspec +1 -2
  66. metadata +45 -28
  67. data/lib/wukong/local/tcp_driver.rb +0 -47
  68. data/spec/wu/geo/geolocated_spec.rb +0 -247
  69. data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -9,7 +9,7 @@ shared_context 'model', :model_spec do
9
9
 
10
10
  after(:each){ Gorillib::Test.nuke_constants ; Meta::Gorillib::Test.nuke_constants }
11
11
 
12
- let(:mock_val){ mock('mock value') }
12
+ let(:mock_val){ double('mock value') }
13
13
 
14
14
  let(:smurf_class) do
15
15
  class Gorillib::Test::Smurf
@@ -0,0 +1,57 @@
1
+ shared_context "graphs" do
2
+
3
+ let(:empty_graph) do
4
+ Hanuman::Graph.receive({})
5
+ end
6
+
7
+ let(:single_stage_graph) do
8
+ Hanuman::Graph.receive({stages: {first: Hanuman::Stage.receive(label: :first)}})
9
+ end
10
+
11
+ let(:graph) do
12
+ Hanuman::Graph.receive(stages: {
13
+ first_a: Hanuman::Stage.receive(label: :first_a), # ancestor
14
+ first_b: Hanuman::Stage.receive(label: :first_b), # ancestor,
15
+ second: Hanuman::Stage.receive(label: :second), # ancestor, descendent
16
+ third_a: Hanuman::Stage.receive(label: :third_a), # ancestor, descendent
17
+ third_b: Hanuman::Stage.receive(label: :third_b), # descendent
18
+ fourth: Hanuman::Stage.receive(label: :fourth), # descendent
19
+ },
20
+ links: [
21
+ Hanuman::LinkFactory.connect(:simple, :first_a, :second),
22
+ Hanuman::LinkFactory.connect(:simple, :first_b, :second),
23
+ Hanuman::LinkFactory.connect(:simple, :second, :third_a),
24
+ Hanuman::LinkFactory.connect(:simple, :second, :third_b),
25
+ Hanuman::LinkFactory.connect(:simple, :third_a, :fourth),
26
+ ]
27
+ )
28
+ end
29
+
30
+ let(:empty_tree) do
31
+ Hanuman::Tree.receive({})
32
+ end
33
+
34
+ let(:single_stage_tree) do
35
+ Hanuman::Tree.receive({stages: { first: Hanuman::Stage.receive(label: :first)}})
36
+ end
37
+
38
+ let(:tree) do
39
+ Hanuman::Tree.receive(stages: {
40
+ # important to mix up the order here so we
41
+ # can ensure that tree-sorting is working.
42
+ first: Hanuman::Stage.receive(label: :first), # ancestor,
43
+ third_b: Hanuman::Stage.receive(label: :third_b), # descendent
44
+ second: Hanuman::Stage.receive(label: :second), # ancestor, descendent
45
+ fourth: Hanuman::Stage.receive(label: :fourth), # descendent
46
+ third_a: Hanuman::Stage.receive(label: :third_a), # ancestor, descendent
47
+ },
48
+ links: [
49
+ Hanuman::LinkFactory.connect(:simple, :first, :second),
50
+ Hanuman::LinkFactory.connect(:simple, :second, :third_a),
51
+ Hanuman::LinkFactory.connect(:simple, :second, :third_b),
52
+ Hanuman::LinkFactory.connect(:simple, :third_a, :fourth),
53
+ ]
54
+ )
55
+ end
56
+
57
+ end
@@ -1,5 +1,12 @@
1
1
  shared_examples_for 'a Stage::Builder' do
2
- before(:each){ Hanuman::GlobalRegistry.clear! }
2
+ before(:each) do
3
+ @registry = Hanuman::GlobalRegistry.show
4
+ Hanuman::GlobalRegistry.clear!
5
+ end
6
+
7
+ after(:each) do
8
+ Hanuman::GlobalRegistry.merge! @registry
9
+ end
3
10
 
4
11
  context '.receive' do
5
12
  it 'extra arguments are stored in the :args attribute' do
@@ -84,18 +91,4 @@ shared_examples_for 'a Stage::Builder' do
84
91
  subject.serialize.should_not include(:args, :for_class)
85
92
  end
86
93
  end
87
-
88
- context '#into' do
89
- subject { described_class.receive(label: :pyro) }
90
- let(:other_stage){ described_class.receive(label: :iceman) }
91
-
92
- it 'returns the linked into stage for chaining' do
93
- subject.into(other_stage).should be(other_stage)
94
- end
95
-
96
- it 'links two stages together with a simple link' do
97
- subject.into(other_stage)
98
- subject.links.should be_any{ |link| link.from == :pyro and link.into == :iceman }
99
- end
100
- end
101
94
  end
@@ -1,2 +1,154 @@
1
1
  require 'spec_helper'
2
2
 
3
+ describe Wukong::DriverMethods do
4
+
5
+ describe "#construct_dataflow" do
6
+
7
+ context "given a label registered to a processor" do
8
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
9
+
10
+ context "constructs an anonymous dataflow" do
11
+ let(:dataflow) { driver.dataflow }
12
+ subject { dataflow }
13
+
14
+ it { should_not be_nil }
15
+ its(:label) { should be_nil }
16
+ its(:links) { should be_empty }
17
+
18
+ context "with a single stage" do
19
+ let(:stage) { dataflow.stages.values.first }
20
+ subject { stage }
21
+
22
+ it { should_not be_nil }
23
+ its(:label) { should == :regexp }
24
+ its(:match) { should == /hi/ }
25
+ end
26
+ end
27
+ end
28
+
29
+ context "given a serialization argument" do
30
+ context "that does not match a registered serializer" do
31
+ it "raises an error" do
32
+ expect { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, to: 'fake') }.to raise_error(Wukong::Error)
33
+ end
34
+ end
35
+ context "that matches a registered serializer" do
36
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, to: 'json') }
37
+ let(:dataflow) { driver.dataflow }
38
+ it "appends the serializer to each of the leaves" do
39
+ dataflow.leaves.size.should == 1
40
+ dataflow.leaves.map(&:label).should_not include(:regexp)
41
+ end
42
+ end
43
+ end
44
+
45
+ context "given a deserialization argument" do
46
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, from: 'json') }
47
+ let(:dataflow) { driver.dataflow }
48
+
49
+ context "adds a deserializer" do
50
+ subject { dataflow.stages[:from_json] }
51
+ it { should_not be_nil }
52
+ it "which is the root of the dataflow" do
53
+ dataflow.root.should == subject
54
+ end
55
+ it "which is linked to the original root" do
56
+ dataflow.ancestors(dataflow.stages[:regexp]).should include(subject)
57
+ end
58
+ end
59
+ end
60
+
61
+ context "given a recordization argument" do
62
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, as: Time) }
63
+ let(:dataflow) { driver.dataflow }
64
+
65
+ context "adds a recordizer" do
66
+ subject { dataflow.stages[:recordize] }
67
+ it { should_not be_nil }
68
+ it "which is the root of the dataflow" do
69
+ dataflow.root.should == subject
70
+ end
71
+ it "which is linked to the original root" do
72
+ dataflow.ancestors(dataflow.stages[:regexp]).should include(subject)
73
+ end
74
+ end
75
+ end
76
+
77
+ context "given deserialization and recordization arguments" do
78
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, from: 'json', as: Time) }
79
+ let(:dataflow) { driver.dataflow }
80
+ let(:recordizer) { dataflow.stages[:recordize] }
81
+ let(:deserializer) { dataflow.stages[:from_json] }
82
+
83
+ context "adds a deserializer" do
84
+ subject { deserializer }
85
+ it { should_not be_nil }
86
+ it "which is the root of the dataflow" do
87
+ dataflow.root.should == subject
88
+ end
89
+ it "which is linked to the recordizer" do
90
+ dataflow.ancestors(dataflow.stages[:recordizer]).should include(subject)
91
+ end
92
+ end
93
+
94
+ context "adds a recordizer" do
95
+ subject { recordizer }
96
+ it { should_not be_nil }
97
+ it "which is linked to the original root" do
98
+ dataflow.ancestors(dataflow.stages[:regexp]).should include(subject)
99
+ end
100
+ end
101
+
102
+ end
103
+ end
104
+
105
+ describe "#setup_dataflow" do
106
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
107
+
108
+ it "calls the driver's #setup method" do
109
+ driver.should_receive(:setup)
110
+ driver.setup_dataflow
111
+ end
112
+
113
+ it "calls setup on each stage of the dataflow" do
114
+ driver.dataflow.each_stage do |stage|
115
+ stage.should_receive(:setup)
116
+ end
117
+ driver.setup_dataflow
118
+ end
119
+ end
120
+
121
+ describe "#finalize_dataflow" do
122
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
123
+ it "calls the driver's #finalize method" do
124
+ driver.should_receive(:finalize)
125
+ driver.finalize_dataflow
126
+ end
127
+
128
+ it "calls finalize on each stage of the dataflow" do
129
+ driver.dataflow.each_stage do |stage|
130
+ stage.should_receive(:finalize)
131
+ end
132
+ driver.finalize_dataflow
133
+ end
134
+ end
135
+
136
+ describe "#finalize_and_stop_dataflow" do
137
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
138
+ it "calls the driver's #finalize and #stop methods" do
139
+ driver.should_receive(:finalize)
140
+ driver.should_receive(:stop)
141
+ driver.finalize_and_stop_dataflow
142
+ end
143
+
144
+ it "calls finalize and stop on each stage of the dataflow" do
145
+ driver.dataflow.each_stage do |stage|
146
+ stage.should_receive(:finalize)
147
+ stage.should_receive(:stop)
148
+ end
149
+ driver.finalize_and_stop_dataflow
150
+ end
151
+
152
+ end
153
+
154
+ end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Wukong::Local::LocalRunner do
4
- before { EM.stub!(:run) }
4
+ before { EM.stub(:run) }
5
5
 
6
6
  describe "choosing a processor name" do
7
7
 
@@ -25,16 +25,5 @@ describe Wukong::Local::LocalRunner do
25
25
  local_runner(examples_dir('string_reverser.rb')).processor.should == 'string_reverser'
26
26
  end
27
27
  end
28
-
29
- describe "uses a" do
30
- it "StdioDriver by default" do
31
- local_runner('identity').driver.should == Wukong::Local::StdioDriver
32
- end
33
-
34
- it "TCPDriver when given a --port argument" do
35
- local_runner('identity','--tcp_port=6000').driver.should == Wukong::Local::TCPDriver
36
- end
37
-
38
- end
39
28
 
40
29
  end
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Local::StdioDriver do
4
+ before { EM.stub(:stop) }
5
+
6
+ let(:driver) { Wukong::Local::StdioDriver.new(:bogus_event_machine_inserted_arg, :identity, {}) }
7
+
8
+ it "attaches itself as an EventMachine handler for $stdin, providing the given label and settings" do
9
+ EM.should_receive(:attach).with($stdin, Wukong::Local::StdioDriver, :foo, {})
10
+ Wukong::Local::StdioDriver.start(:foo, {})
11
+ end
12
+
13
+ describe "setting up a dataflow" do
14
+ context "#post_init hook from EventMachine" do
15
+ after { driver.post_init }
16
+
17
+ it "sets signal traps for INT and TERM" do
18
+ Signal.should_receive(:trap).with('INT').at_least(:once)
19
+ Signal.should_receive(:trap).with('TERM').at_least(:once)
20
+ end
21
+
22
+ it "calls the #setup_dataflow method" do
23
+ driver.should_receive(:setup_dataflow).at_least(:once)
24
+ end
25
+
26
+ it "syncs $stdout" do
27
+ $stdout.should_receive(:sync).at_least(:once)
28
+ end
29
+
30
+ end
31
+ end
32
+
33
+ describe "driving a dataflow" do
34
+ context "#receive_line hook from EventMachine" do
35
+ let(:line) { "hello" }
36
+
37
+ after { driver.receive_line(line) }
38
+ it "passes the line to the #send_through_dataflow method" do
39
+ driver.should_receive(:send_through_dataflow).with(line)
40
+ end
41
+
42
+ context "upon an error within the dataflow" do
43
+ let(:message) { "whoops" }
44
+
45
+ before do
46
+ driver.should_receive(:send_through_dataflow).with(line) do
47
+ raise Wukong::Error.new(message)
48
+ end
49
+ driver.log.stub(:error)
50
+ end
51
+
52
+ it "logs an error message" do
53
+ driver.log.should_receive(:error).with(kind_of(String))
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "shutting down a dataflow" do
60
+ context "#unbind hook from EventMachine" do
61
+ after { driver.unbind }
62
+
63
+ it "calls the #finalize_and_stop_dataflow method" do
64
+ driver.should_receive(:finalize_and_stop_dataflow)
65
+ end
66
+
67
+ it "stops the EventMachine reactor" do
68
+ EM.should_receive(:stop)
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -9,7 +9,6 @@ describe Wukong::Processor do
9
9
  it{ should respond_to(:process) }
10
10
  it{ should respond_to(:finalize) }
11
11
  it{ should respond_to(:stop) }
12
- it{ should respond_to(:notify) }
13
12
  end
14
13
 
15
14
  describe "default process method" do
@@ -90,7 +90,7 @@ describe Wukong::Runner do
90
90
  let(:deploy_pack_dir) { examples_dir('ruby_project') }
91
91
  before do
92
92
  FileUtils.cd(dir)
93
- ENV.stub!(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
93
+ ENV.stub(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
94
94
  end
95
95
  subject { generic_runner }
96
96
  its(:deploy_pack_dir) { should == '/' }
@@ -103,7 +103,7 @@ describe Wukong::Runner do
103
103
  let(:deploy_pack_dir) { examples_dir('deploy_pack') }
104
104
  before do
105
105
  FileUtils.cd(dir)
106
- ENV.stub!(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
106
+ ENV.stub(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
107
107
  end
108
108
  subject { generic_runner }
109
109
  its(:deploy_pack_dir) { should == deploy_pack_dir.to_s }
@@ -0,0 +1,6 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Source do
4
+ it_behaves_like 'a plugin'
5
+ end
6
+
@@ -0,0 +1,101 @@
1
+ require 'spec_helper'
2
+
3
+ describe :extract do
4
+
5
+ let(:hsh) { { "hi" => "there", "top" => { "lower" => { "lowest" => "value" } } } }
6
+ let(:ary) { ['1', 2, 'three'] }
7
+
8
+ subject { processor(:extract) }
9
+
10
+ it_behaves_like 'a processor', :named => :extract
11
+
12
+ context "on a string" do
13
+ it "emits the string with no arguments" do
14
+ processor(:extract).given('hi there', 'buddy').should emit('hi there', 'buddy')
15
+ end
16
+ end
17
+ context "on a Fixnum" do
18
+ it "emits the number with no arguments" do
19
+ processor(:extract).given(3, 3.0).should emit(3, 3.0)
20
+ end
21
+ end
22
+ context "on a Hash" do
23
+ it "emits the hash with no arguments" do
24
+ processor(:extract).given(hsh).should emit(hsh)
25
+ end
26
+ it "can extract a key" do
27
+ processor(:extract, part: 'hi').given(hsh).should emit('there')
28
+ end
29
+ it "emits nil when the value of the key is nil" do
30
+ processor(:extract, part: 'bye').given(hsh).should emit(nil)
31
+ end
32
+ it "can extract a nested key" do
33
+ processor(:extract, part: 'top.lower.lowest').given(hsh).should emit('value')
34
+ end
35
+ it "emits nil when the value of this nested key is nil" do
36
+ processor(:extract, part: 'foo.bar.baz').given(hsh).should emit(nil)
37
+ end
38
+ end
39
+ context "on an Array" do
40
+ it "emits the array with no arguments" do
41
+ processor(:extract).given(ary).should emit(ary)
42
+ end
43
+ it "can extract the nth value with an integer argument" do
44
+ processor(:extract, part: 2).given(ary).should emit(2)
45
+ end
46
+ it "can extract the nth value with a string argument" do
47
+ processor(:extract, part: '2').given(ary).should emit(2)
48
+ end
49
+ end
50
+ context "on JSON" do
51
+ let(:garbage) { '{"239823:' }
52
+ it "emits the JSON with no arguments" do
53
+ processor(:extract).given_json(hsh).should emit_json(hsh)
54
+ end
55
+ it "will skip badly formed records" do
56
+ processor(:extract).given(garbage).should emit(garbage)
57
+ end
58
+ it "can extract a key" do
59
+ processor(:extract, part: 'hi').given_json(hsh).should emit('there')
60
+ end
61
+ it "can extract a nested key" do
62
+ processor(:extract, part: 'top.lower.lowest').given_json(hsh).should emit('value')
63
+ end
64
+ it "emits nil when the record is missing the key" do
65
+ processor(:extract, part: 'foo.bar.baz').given_json(hsh).should emit(nil)
66
+ end
67
+ end
68
+ context "on delimited data" do
69
+ it "emits the row with no arguments" do
70
+ processor(:extract).given_delimited('|', ary).should emit(ary.map(&:to_s).join('|'))
71
+ end
72
+ it "can extract the nth value with an integer argument" do
73
+ processor(:extract, part: 2, separator: '|').given_delimited('|', ary).should emit('2')
74
+ end
75
+ it "can extract nth value with a string argument" do
76
+ processor(:extract, part: '2', separator: '|').given_delimited('|', ary).should emit('2')
77
+ end
78
+ end
79
+ context "on TSV" do
80
+ it "emits the TSV with no arguments" do
81
+ processor(:extract).given_tsv(ary).should emit(ary.map(&:to_s).join("\t"))
82
+ end
83
+ it "can extract the nth value with an integer argument" do
84
+ processor(:extract, part: 2).given_tsv(ary).should emit('2')
85
+ end
86
+ it "can extract the nth value with a string argument" do
87
+ processor(:extract, part: '2').given_tsv(ary).should emit('2')
88
+ end
89
+ end
90
+ context "on CSV" do
91
+ it "emits the CSV with no arguments" do
92
+ processor(:extract).given_csv(ary).should emit(ary.map(&:to_s).join(","))
93
+ end
94
+ it "can extract the nth value with an integer argument" do
95
+ processor(:extract, part: 2, separator: ',').given_csv(ary).should emit('2')
96
+ end
97
+ it "can extract the nth value with a string argument" do
98
+ processor(:extract, part: '2', separator: ',').given_csv(ary).should emit('2')
99
+ end
100
+ end
101
+ end