wukong 3.0.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +1 -1
  3. data/README.md +253 -45
  4. data/bin/wu +34 -0
  5. data/bin/wu-source +5 -0
  6. data/examples/Gemfile +0 -1
  7. data/examples/deploy_pack/Gemfile +0 -1
  8. data/examples/improver/tweet_summary.rb +73 -0
  9. data/examples/ruby_project/Gemfile +0 -1
  10. data/examples/splitter.rb +94 -0
  11. data/examples/twitter.rb +5 -0
  12. data/lib/hanuman.rb +1 -1
  13. data/lib/hanuman/graph.rb +39 -22
  14. data/lib/hanuman/stage.rb +46 -13
  15. data/lib/hanuman/tree.rb +67 -0
  16. data/lib/wukong.rb +6 -1
  17. data/lib/wukong/dataflow.rb +19 -48
  18. data/lib/wukong/driver.rb +176 -65
  19. data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
  20. data/lib/wukong/driver/wiring.rb +68 -0
  21. data/lib/wukong/local.rb +6 -4
  22. data/lib/wukong/local/runner.rb +14 -16
  23. data/lib/wukong/local/stdio_driver.rb +72 -12
  24. data/lib/wukong/processor.rb +1 -30
  25. data/lib/wukong/runner.rb +2 -0
  26. data/lib/wukong/runner/command_runner.rb +44 -0
  27. data/lib/wukong/source.rb +33 -0
  28. data/lib/wukong/source/source_driver.rb +74 -0
  29. data/lib/wukong/source/source_runner.rb +38 -0
  30. data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
  31. data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
  32. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
  33. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
  34. data/lib/wukong/version.rb +1 -1
  35. data/lib/wukong/widget/echo.rb +55 -0
  36. data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
  37. data/lib/wukong/widget/filters.rb +15 -0
  38. data/lib/wukong/widget/logger.rb +56 -0
  39. data/lib/wukong/widget/operators.rb +82 -0
  40. data/lib/wukong/widget/reducers.rb +2 -0
  41. data/lib/wukong/widget/reducers/improver.rb +71 -0
  42. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  43. data/lib/wukong/widget/serializers.rb +21 -6
  44. data/lib/wukong/widgets.rb +6 -3
  45. data/spec/hanuman/graph_spec.rb +73 -10
  46. data/spec/hanuman/stage_spec.rb +15 -0
  47. data/spec/hanuman/tree_spec.rb +119 -0
  48. data/spec/spec_helper.rb +13 -1
  49. data/spec/support/example_test_helpers.rb +0 -1
  50. data/spec/support/model_test_helpers.rb +1 -1
  51. data/spec/support/shared_context_for_graphs.rb +57 -0
  52. data/spec/support/shared_examples_for_builders.rb +8 -15
  53. data/spec/wukong/driver_spec.rb +152 -0
  54. data/spec/wukong/local/runner_spec.rb +1 -12
  55. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  56. data/spec/wukong/processor_spec.rb +0 -1
  57. data/spec/wukong/runner_spec.rb +2 -2
  58. data/spec/wukong/source_spec.rb +6 -0
  59. data/spec/wukong/widget/extract_spec.rb +101 -0
  60. data/spec/wukong/widget/logger_spec.rb +23 -0
  61. data/spec/wukong/widget/operators_spec.rb +25 -0
  62. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  63. data/spec/wukong/wu-source_spec.rb +32 -0
  64. data/spec/wukong/wu_spec.rb +14 -0
  65. data/wukong.gemspec +1 -2
  66. metadata +45 -28
  67. data/lib/wukong/local/tcp_driver.rb +0 -47
  68. data/spec/wu/geo/geolocated_spec.rb +0 -247
  69. data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -9,7 +9,7 @@ shared_context 'model', :model_spec do
9
9
 
10
10
  after(:each){ Gorillib::Test.nuke_constants ; Meta::Gorillib::Test.nuke_constants }
11
11
 
12
- let(:mock_val){ mock('mock value') }
12
+ let(:mock_val){ double('mock value') }
13
13
 
14
14
  let(:smurf_class) do
15
15
  class Gorillib::Test::Smurf
@@ -0,0 +1,57 @@
1
+ shared_context "graphs" do
2
+
3
+ let(:empty_graph) do
4
+ Hanuman::Graph.receive({})
5
+ end
6
+
7
+ let(:single_stage_graph) do
8
+ Hanuman::Graph.receive({stages: {first: Hanuman::Stage.receive(label: :first)}})
9
+ end
10
+
11
+ let(:graph) do
12
+ Hanuman::Graph.receive(stages: {
13
+ first_a: Hanuman::Stage.receive(label: :first_a), # ancestor
14
+ first_b: Hanuman::Stage.receive(label: :first_b), # ancestor,
15
+ second: Hanuman::Stage.receive(label: :second), # ancestor, descendent
16
+ third_a: Hanuman::Stage.receive(label: :third_a), # ancestor, descendent
17
+ third_b: Hanuman::Stage.receive(label: :third_b), # descendent
18
+ fourth: Hanuman::Stage.receive(label: :fourth), # descendent
19
+ },
20
+ links: [
21
+ Hanuman::LinkFactory.connect(:simple, :first_a, :second),
22
+ Hanuman::LinkFactory.connect(:simple, :first_b, :second),
23
+ Hanuman::LinkFactory.connect(:simple, :second, :third_a),
24
+ Hanuman::LinkFactory.connect(:simple, :second, :third_b),
25
+ Hanuman::LinkFactory.connect(:simple, :third_a, :fourth),
26
+ ]
27
+ )
28
+ end
29
+
30
+ let(:empty_tree) do
31
+ Hanuman::Tree.receive({})
32
+ end
33
+
34
+ let(:single_stage_tree) do
35
+ Hanuman::Tree.receive({stages: { first: Hanuman::Stage.receive(label: :first)}})
36
+ end
37
+
38
+ let(:tree) do
39
+ Hanuman::Tree.receive(stages: {
40
+ # important to mix up the order here so we
41
+ # can ensure that tree-sorting is working.
42
+ first: Hanuman::Stage.receive(label: :first), # ancestor,
43
+ third_b: Hanuman::Stage.receive(label: :third_b), # descendent
44
+ second: Hanuman::Stage.receive(label: :second), # ancestor, descendent
45
+ fourth: Hanuman::Stage.receive(label: :fourth), # descendent
46
+ third_a: Hanuman::Stage.receive(label: :third_a), # ancestor, descendent
47
+ },
48
+ links: [
49
+ Hanuman::LinkFactory.connect(:simple, :first, :second),
50
+ Hanuman::LinkFactory.connect(:simple, :second, :third_a),
51
+ Hanuman::LinkFactory.connect(:simple, :second, :third_b),
52
+ Hanuman::LinkFactory.connect(:simple, :third_a, :fourth),
53
+ ]
54
+ )
55
+ end
56
+
57
+ end
@@ -1,5 +1,12 @@
1
1
  shared_examples_for 'a Stage::Builder' do
2
- before(:each){ Hanuman::GlobalRegistry.clear! }
2
+ before(:each) do
3
+ @registry = Hanuman::GlobalRegistry.show
4
+ Hanuman::GlobalRegistry.clear!
5
+ end
6
+
7
+ after(:each) do
8
+ Hanuman::GlobalRegistry.merge! @registry
9
+ end
3
10
 
4
11
  context '.receive' do
5
12
  it 'extra arguments are stored in the :args attribute' do
@@ -84,18 +91,4 @@ shared_examples_for 'a Stage::Builder' do
84
91
  subject.serialize.should_not include(:args, :for_class)
85
92
  end
86
93
  end
87
-
88
- context '#into' do
89
- subject { described_class.receive(label: :pyro) }
90
- let(:other_stage){ described_class.receive(label: :iceman) }
91
-
92
- it 'returns the linked into stage for chaining' do
93
- subject.into(other_stage).should be(other_stage)
94
- end
95
-
96
- it 'links two stages together with a simple link' do
97
- subject.into(other_stage)
98
- subject.links.should be_any{ |link| link.from == :pyro and link.into == :iceman }
99
- end
100
- end
101
94
  end
@@ -1,2 +1,154 @@
1
1
  require 'spec_helper'
2
2
 
3
+ describe Wukong::DriverMethods do
4
+
5
+ describe "#construct_dataflow" do
6
+
7
+ context "given a label registered to a processor" do
8
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
9
+
10
+ context "constructs an anonymous dataflow" do
11
+ let(:dataflow) { driver.dataflow }
12
+ subject { dataflow }
13
+
14
+ it { should_not be_nil }
15
+ its(:label) { should be_nil }
16
+ its(:links) { should be_empty }
17
+
18
+ context "with a single stage" do
19
+ let(:stage) { dataflow.stages.values.first }
20
+ subject { stage }
21
+
22
+ it { should_not be_nil }
23
+ its(:label) { should == :regexp }
24
+ its(:match) { should == /hi/ }
25
+ end
26
+ end
27
+ end
28
+
29
+ context "given a serialization argument" do
30
+ context "that does not match a registered serializer" do
31
+ it "raises an error" do
32
+ expect { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, to: 'fake') }.to raise_error(Wukong::Error)
33
+ end
34
+ end
35
+ context "that matches a registered serializer" do
36
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, to: 'json') }
37
+ let(:dataflow) { driver.dataflow }
38
+ it "appends the serializer to each of the leaves" do
39
+ dataflow.leaves.size.should == 1
40
+ dataflow.leaves.map(&:label).should_not include(:regexp)
41
+ end
42
+ end
43
+ end
44
+
45
+ context "given a deserialization argument" do
46
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, from: 'json') }
47
+ let(:dataflow) { driver.dataflow }
48
+
49
+ context "adds a deserializer" do
50
+ subject { dataflow.stages[:from_json] }
51
+ it { should_not be_nil }
52
+ it "which is the root of the dataflow" do
53
+ dataflow.root.should == subject
54
+ end
55
+ it "which is linked to the original root" do
56
+ dataflow.ancestors(dataflow.stages[:regexp]).should include(subject)
57
+ end
58
+ end
59
+ end
60
+
61
+ context "given a recordization argument" do
62
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, as: Time) }
63
+ let(:dataflow) { driver.dataflow }
64
+
65
+ context "adds a recordizer" do
66
+ subject { dataflow.stages[:recordize] }
67
+ it { should_not be_nil }
68
+ it "which is the root of the dataflow" do
69
+ dataflow.root.should == subject
70
+ end
71
+ it "which is linked to the original root" do
72
+ dataflow.ancestors(dataflow.stages[:regexp]).should include(subject)
73
+ end
74
+ end
75
+ end
76
+
77
+ context "given deserialization and recordization arguments" do
78
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/, from: 'json', as: Time) }
79
+ let(:dataflow) { driver.dataflow }
80
+ let(:recordizer) { dataflow.stages[:recordize] }
81
+ let(:deserializer) { dataflow.stages[:from_json] }
82
+
83
+ context "adds a deserializer" do
84
+ subject { deserializer }
85
+ it { should_not be_nil }
86
+ it "which is the root of the dataflow" do
87
+ dataflow.root.should == subject
88
+ end
89
+ it "which is linked to the recordizer" do
90
+ dataflow.ancestors(dataflow.stages[:recordizer]).should include(subject)
91
+ end
92
+ end
93
+
94
+ context "adds a recordizer" do
95
+ subject { recordizer }
96
+ it { should_not be_nil }
97
+ it "which is linked to the original root" do
98
+ dataflow.ancestors(dataflow.stages[:regexp]).should include(subject)
99
+ end
100
+ end
101
+
102
+ end
103
+ end
104
+
105
+ describe "#setup_dataflow" do
106
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
107
+
108
+ it "calls the driver's #setup method" do
109
+ driver.should_receive(:setup)
110
+ driver.setup_dataflow
111
+ end
112
+
113
+ it "calls setup on each stage of the dataflow" do
114
+ driver.dataflow.each_stage do |stage|
115
+ stage.should_receive(:setup)
116
+ end
117
+ driver.setup_dataflow
118
+ end
119
+ end
120
+
121
+ describe "#finalize_dataflow" do
122
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
123
+ it "calls the driver's #finalize method" do
124
+ driver.should_receive(:finalize)
125
+ driver.finalize_dataflow
126
+ end
127
+
128
+ it "calls finalize on each stage of the dataflow" do
129
+ driver.dataflow.each_stage do |stage|
130
+ stage.should_receive(:finalize)
131
+ end
132
+ driver.finalize_dataflow
133
+ end
134
+ end
135
+
136
+ describe "#finalize_and_stop_dataflow" do
137
+ let(:driver) { Wukong::SpecHelpers::UnitTestDriver.new(:regexp, match: /hi/) }
138
+ it "calls the driver's #finalize and #stop methods" do
139
+ driver.should_receive(:finalize)
140
+ driver.should_receive(:stop)
141
+ driver.finalize_and_stop_dataflow
142
+ end
143
+
144
+ it "calls finalize and stop on each stage of the dataflow" do
145
+ driver.dataflow.each_stage do |stage|
146
+ stage.should_receive(:finalize)
147
+ stage.should_receive(:stop)
148
+ end
149
+ driver.finalize_and_stop_dataflow
150
+ end
151
+
152
+ end
153
+
154
+ end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Wukong::Local::LocalRunner do
4
- before { EM.stub!(:run) }
4
+ before { EM.stub(:run) }
5
5
 
6
6
  describe "choosing a processor name" do
7
7
 
@@ -25,16 +25,5 @@ describe Wukong::Local::LocalRunner do
25
25
  local_runner(examples_dir('string_reverser.rb')).processor.should == 'string_reverser'
26
26
  end
27
27
  end
28
-
29
- describe "uses a" do
30
- it "StdioDriver by default" do
31
- local_runner('identity').driver.should == Wukong::Local::StdioDriver
32
- end
33
-
34
- it "TCPDriver when given a --port argument" do
35
- local_runner('identity','--tcp_port=6000').driver.should == Wukong::Local::TCPDriver
36
- end
37
-
38
- end
39
28
 
40
29
  end
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Local::StdioDriver do
4
+ before { EM.stub(:stop) }
5
+
6
+ let(:driver) { Wukong::Local::StdioDriver.new(:bogus_event_machine_inserted_arg, :identity, {}) }
7
+
8
+ it "attaches itself as an EventMachine handler for $stdin, providing the given label and settings" do
9
+ EM.should_receive(:attach).with($stdin, Wukong::Local::StdioDriver, :foo, {})
10
+ Wukong::Local::StdioDriver.start(:foo, {})
11
+ end
12
+
13
+ describe "setting up a dataflow" do
14
+ context "#post_init hook from EventMachine" do
15
+ after { driver.post_init }
16
+
17
+ it "sets signal traps for INT and TERM" do
18
+ Signal.should_receive(:trap).with('INT').at_least(:once)
19
+ Signal.should_receive(:trap).with('TERM').at_least(:once)
20
+ end
21
+
22
+ it "calls the #setup_dataflow method" do
23
+ driver.should_receive(:setup_dataflow).at_least(:once)
24
+ end
25
+
26
+ it "syncs $stdout" do
27
+ $stdout.should_receive(:sync).at_least(:once)
28
+ end
29
+
30
+ end
31
+ end
32
+
33
+ describe "driving a dataflow" do
34
+ context "#receive_line hook from EventMachine" do
35
+ let(:line) { "hello" }
36
+
37
+ after { driver.receive_line(line) }
38
+ it "passes the line to the #send_through_dataflow method" do
39
+ driver.should_receive(:send_through_dataflow).with(line)
40
+ end
41
+
42
+ context "upon an error within the dataflow" do
43
+ let(:message) { "whoops" }
44
+
45
+ before do
46
+ driver.should_receive(:send_through_dataflow).with(line) do
47
+ raise Wukong::Error.new(message)
48
+ end
49
+ driver.log.stub(:error)
50
+ end
51
+
52
+ it "logs an error message" do
53
+ driver.log.should_receive(:error).with(kind_of(String))
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "shutting down a dataflow" do
60
+ context "#unbind hook from EventMachine" do
61
+ after { driver.unbind }
62
+
63
+ it "calls the #finalize_and_stop_dataflow method" do
64
+ driver.should_receive(:finalize_and_stop_dataflow)
65
+ end
66
+
67
+ it "stops the EventMachine reactor" do
68
+ EM.should_receive(:stop)
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -9,7 +9,6 @@ describe Wukong::Processor do
9
9
  it{ should respond_to(:process) }
10
10
  it{ should respond_to(:finalize) }
11
11
  it{ should respond_to(:stop) }
12
- it{ should respond_to(:notify) }
13
12
  end
14
13
 
15
14
  describe "default process method" do
@@ -90,7 +90,7 @@ describe Wukong::Runner do
90
90
  let(:deploy_pack_dir) { examples_dir('ruby_project') }
91
91
  before do
92
92
  FileUtils.cd(dir)
93
- ENV.stub!(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
93
+ ENV.stub(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
94
94
  end
95
95
  subject { generic_runner }
96
96
  its(:deploy_pack_dir) { should == '/' }
@@ -103,7 +103,7 @@ describe Wukong::Runner do
103
103
  let(:deploy_pack_dir) { examples_dir('deploy_pack') }
104
104
  before do
105
105
  FileUtils.cd(dir)
106
- ENV.stub!(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
106
+ ENV.stub(:[]).with("BUNDLE_GEMFILE").and_return(File.join(deploy_pack_dir, 'Gemfile'))
107
107
  end
108
108
  subject { generic_runner }
109
109
  its(:deploy_pack_dir) { should == deploy_pack_dir.to_s }
@@ -0,0 +1,6 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Source do
4
+ it_behaves_like 'a plugin'
5
+ end
6
+
@@ -0,0 +1,101 @@
1
+ require 'spec_helper'
2
+
3
+ describe :extract do
4
+
5
+ let(:hsh) { { "hi" => "there", "top" => { "lower" => { "lowest" => "value" } } } }
6
+ let(:ary) { ['1', 2, 'three'] }
7
+
8
+ subject { processor(:extract) }
9
+
10
+ it_behaves_like 'a processor', :named => :extract
11
+
12
+ context "on a string" do
13
+ it "emits the string with no arguments" do
14
+ processor(:extract).given('hi there', 'buddy').should emit('hi there', 'buddy')
15
+ end
16
+ end
17
+ context "on a Fixnum" do
18
+ it "emits the number with no arguments" do
19
+ processor(:extract).given(3, 3.0).should emit(3, 3.0)
20
+ end
21
+ end
22
+ context "on a Hash" do
23
+ it "emits the hash with no arguments" do
24
+ processor(:extract).given(hsh).should emit(hsh)
25
+ end
26
+ it "can extract a key" do
27
+ processor(:extract, part: 'hi').given(hsh).should emit('there')
28
+ end
29
+ it "emits nil when the value of the key is nil" do
30
+ processor(:extract, part: 'bye').given(hsh).should emit(nil)
31
+ end
32
+ it "can extract a nested key" do
33
+ processor(:extract, part: 'top.lower.lowest').given(hsh).should emit('value')
34
+ end
35
+ it "emits nil when the value of this nested key is nil" do
36
+ processor(:extract, part: 'foo.bar.baz').given(hsh).should emit(nil)
37
+ end
38
+ end
39
+ context "on an Array" do
40
+ it "emits the array with no arguments" do
41
+ processor(:extract).given(ary).should emit(ary)
42
+ end
43
+ it "can extract the nth value with an integer argument" do
44
+ processor(:extract, part: 2).given(ary).should emit(2)
45
+ end
46
+ it "can extract the nth value with a string argument" do
47
+ processor(:extract, part: '2').given(ary).should emit(2)
48
+ end
49
+ end
50
+ context "on JSON" do
51
+ let(:garbage) { '{"239823:' }
52
+ it "emits the JSON with no arguments" do
53
+ processor(:extract).given_json(hsh).should emit_json(hsh)
54
+ end
55
+ it "will skip badly formed records" do
56
+ processor(:extract).given(garbage).should emit(garbage)
57
+ end
58
+ it "can extract a key" do
59
+ processor(:extract, part: 'hi').given_json(hsh).should emit('there')
60
+ end
61
+ it "can extract a nested key" do
62
+ processor(:extract, part: 'top.lower.lowest').given_json(hsh).should emit('value')
63
+ end
64
+ it "emits nil when the record is missing the key" do
65
+ processor(:extract, part: 'foo.bar.baz').given_json(hsh).should emit(nil)
66
+ end
67
+ end
68
+ context "on delimited data" do
69
+ it "emits the row with no arguments" do
70
+ processor(:extract).given_delimited('|', ary).should emit(ary.map(&:to_s).join('|'))
71
+ end
72
+ it "can extract the nth value with an integer argument" do
73
+ processor(:extract, part: 2, separator: '|').given_delimited('|', ary).should emit('2')
74
+ end
75
+ it "can extract nth value with a string argument" do
76
+ processor(:extract, part: '2', separator: '|').given_delimited('|', ary).should emit('2')
77
+ end
78
+ end
79
+ context "on TSV" do
80
+ it "emits the TSV with no arguments" do
81
+ processor(:extract).given_tsv(ary).should emit(ary.map(&:to_s).join("\t"))
82
+ end
83
+ it "can extract the nth value with an integer argument" do
84
+ processor(:extract, part: 2).given_tsv(ary).should emit('2')
85
+ end
86
+ it "can extract the nth value with a string argument" do
87
+ processor(:extract, part: '2').given_tsv(ary).should emit('2')
88
+ end
89
+ end
90
+ context "on CSV" do
91
+ it "emits the CSV with no arguments" do
92
+ processor(:extract).given_csv(ary).should emit(ary.map(&:to_s).join(","))
93
+ end
94
+ it "can extract the nth value with an integer argument" do
95
+ processor(:extract, part: 2, separator: ',').given_csv(ary).should emit('2')
96
+ end
97
+ it "can extract the nth value with a string argument" do
98
+ processor(:extract, part: '2', separator: ',').given_csv(ary).should emit('2')
99
+ end
100
+ end
101
+ end