wukong 3.0.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +1 -1
  3. data/README.md +253 -45
  4. data/bin/wu +34 -0
  5. data/bin/wu-source +5 -0
  6. data/examples/Gemfile +0 -1
  7. data/examples/deploy_pack/Gemfile +0 -1
  8. data/examples/improver/tweet_summary.rb +73 -0
  9. data/examples/ruby_project/Gemfile +0 -1
  10. data/examples/splitter.rb +94 -0
  11. data/examples/twitter.rb +5 -0
  12. data/lib/hanuman.rb +1 -1
  13. data/lib/hanuman/graph.rb +39 -22
  14. data/lib/hanuman/stage.rb +46 -13
  15. data/lib/hanuman/tree.rb +67 -0
  16. data/lib/wukong.rb +6 -1
  17. data/lib/wukong/dataflow.rb +19 -48
  18. data/lib/wukong/driver.rb +176 -65
  19. data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
  20. data/lib/wukong/driver/wiring.rb +68 -0
  21. data/lib/wukong/local.rb +6 -4
  22. data/lib/wukong/local/runner.rb +14 -16
  23. data/lib/wukong/local/stdio_driver.rb +72 -12
  24. data/lib/wukong/processor.rb +1 -30
  25. data/lib/wukong/runner.rb +2 -0
  26. data/lib/wukong/runner/command_runner.rb +44 -0
  27. data/lib/wukong/source.rb +33 -0
  28. data/lib/wukong/source/source_driver.rb +74 -0
  29. data/lib/wukong/source/source_runner.rb +38 -0
  30. data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
  31. data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
  32. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
  33. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
  34. data/lib/wukong/version.rb +1 -1
  35. data/lib/wukong/widget/echo.rb +55 -0
  36. data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
  37. data/lib/wukong/widget/filters.rb +15 -0
  38. data/lib/wukong/widget/logger.rb +56 -0
  39. data/lib/wukong/widget/operators.rb +82 -0
  40. data/lib/wukong/widget/reducers.rb +2 -0
  41. data/lib/wukong/widget/reducers/improver.rb +71 -0
  42. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  43. data/lib/wukong/widget/serializers.rb +21 -6
  44. data/lib/wukong/widgets.rb +6 -3
  45. data/spec/hanuman/graph_spec.rb +73 -10
  46. data/spec/hanuman/stage_spec.rb +15 -0
  47. data/spec/hanuman/tree_spec.rb +119 -0
  48. data/spec/spec_helper.rb +13 -1
  49. data/spec/support/example_test_helpers.rb +0 -1
  50. data/spec/support/model_test_helpers.rb +1 -1
  51. data/spec/support/shared_context_for_graphs.rb +57 -0
  52. data/spec/support/shared_examples_for_builders.rb +8 -15
  53. data/spec/wukong/driver_spec.rb +152 -0
  54. data/spec/wukong/local/runner_spec.rb +1 -12
  55. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  56. data/spec/wukong/processor_spec.rb +0 -1
  57. data/spec/wukong/runner_spec.rb +2 -2
  58. data/spec/wukong/source_spec.rb +6 -0
  59. data/spec/wukong/widget/extract_spec.rb +101 -0
  60. data/spec/wukong/widget/logger_spec.rb +23 -0
  61. data/spec/wukong/widget/operators_spec.rb +25 -0
  62. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  63. data/spec/wukong/wu-source_spec.rb +32 -0
  64. data/spec/wukong/wu_spec.rb +14 -0
  65. data/wukong.gemspec +1 -2
  66. metadata +45 -28
  67. data/lib/wukong/local/tcp_driver.rb +0 -47
  68. data/spec/wu/geo/geolocated_spec.rb +0 -247
  69. data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -1,4 +1,5 @@
1
1
  require_relative("reducers/accumulator")
2
+ require_relative("reducers/improver")
2
3
  require_relative("reducers/sort")
3
4
  require_relative("reducers/count")
4
5
  require_relative("reducers/group")
@@ -6,3 +7,4 @@ require_relative("reducers/group_concat")
6
7
  require_relative("reducers/moments")
7
8
  require_relative("reducers/bin")
8
9
  require_relative("reducers/uniq")
10
+ require_relative("reducers/join_xml")
@@ -0,0 +1,71 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # A base widget for building more complex improver widgets.
5
+ class Improver < Processor
6
+
7
+ # The current group of records.
8
+ attr_accessor :group
9
+
10
+ # Sets up this improver by defining an initial key (with a
11
+ # value that is unlikely to be found in real data) and calling
12
+ # `#zero` with no record.
13
+ def setup
14
+ @key = :__first_group__
15
+ zero
16
+ end
17
+
18
+ def recordize record
19
+ record.split("\t")
20
+ end
21
+
22
+ #
23
+ # All kinds of assumptions here,
24
+ # record is tab-delimited and the
25
+ # first field is a name of a function
26
+ # to call
27
+ #
28
+ def get_function record
29
+ record.first
30
+ end
31
+
32
+ # Processes the `record`.
33
+ def process(record)
34
+ fields = recordize(record)
35
+ func = get_function(fields)
36
+ case func
37
+ when 'zero' then
38
+ yield zero
39
+ when 'accumulate' then
40
+ accumulate(fields[1..-1])
41
+ when 'improve' then
42
+ yield improve(fields[1], self.group)
43
+ self.group = []
44
+ else
45
+ raise NoMethodError, "undefined method #{func} for Improver"
46
+ end
47
+ STDOUT.flush # WHY? Because.
48
+ end
49
+
50
+ # Starts accumulation for a new key. Return what you would
51
+ # with no improvements.
52
+ def zero
53
+ self.group = []
54
+ end
55
+
56
+ # Accumulates another +record+.
57
+ #
58
+ # @param [Object] record
59
+ def accumulate record
60
+ self.group << record
61
+ end
62
+
63
+ # Improve prev with group
64
+ #
65
+ #
66
+ def improve prev, group
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,37 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # Joins XML input data based on a root tag.
5
+ class JoinXML < Processor
6
+
7
+ field :root, String, default: 'xml', doc: "Name of the root XML element"
8
+
9
+ def setup
10
+ @lines = []
11
+ end
12
+
13
+ def process line
14
+ if match = terminator.match(line)
15
+ if match.end(0) == line.size
16
+ @lines << line
17
+ else
18
+ @lines << line[0...match.end(0)]
19
+ end
20
+ yield @lines.join("\n")
21
+ @lines = []
22
+ @lines << line[match.end(0)..-1] unless match.end(0) == line.size
23
+ else
24
+ @lines << line
25
+ end
26
+ end
27
+
28
+ def terminator
29
+ %r{<\s*/\s*#{root}\s*>}i
30
+ end
31
+
32
+ register :join_xml
33
+ end
34
+ end
35
+ end
36
+
37
+
@@ -4,13 +4,11 @@ module Wukong
4
4
  SerializerError = Class.new(Error)
5
5
 
6
6
  class Serializer < Processor
7
- field :on_error, String, default: 'log', :doc => "Action to take upon an error, either 'log' or 'notify'"
8
7
 
9
8
  def handle_error(record, err)
10
- case on_error
11
- when 'log' then log.warn "#{err.class}: #{err.message}"
12
- when 'notify' then notify('error', record: record, error: err)
13
- end
9
+ return if err.class == Errno::EPIPE
10
+ log.error "#{err.class}: #{err.message}"
11
+ err.backtrace.each { |line| log.debug(line) }
14
12
  end
15
13
 
16
14
  end
@@ -292,10 +290,27 @@ EOF
292
290
  def process(record)
293
291
  wire_format = record.try(:to_wire) || record
294
292
  raise SerializerError.new("Can only recordize a Hash-like record") unless wire_format.is_a?(Hash)
295
- yield model.receive(wire_format)
293
+ klass = model_class_for(wire_format)
294
+ if klass
295
+ yield klass.receive(wire_format)
296
+ else
297
+ log.error("No default model class and no explicit model for: #{wire_format.inspect}")
298
+ end
296
299
  rescue => e
297
300
  handle_error(record, e)
298
301
  end
302
+
303
+ def model_class_for(record)
304
+ if explicit_type = (record[:_type] || record["_type"])
305
+ begin
306
+ return explicit_type.constantize
307
+ rescue NameError => e
308
+ log.warn("Could not find a class for <#{explicit_type}>")
309
+ end
310
+ end
311
+ return model if model
312
+ end
313
+
299
314
  register
300
315
  end
301
316
  end
@@ -1,4 +1,7 @@
1
- require 'wukong/widget/processors'
2
- require 'wukong/widget/reducers'
3
- require 'wukong/widget/serializers'
4
1
  require 'wukong/widget/filters'
2
+ require 'wukong/widget/serializers'
3
+ require 'wukong/widget/operators'
4
+ require 'wukong/widget/reducers'
5
+ require 'wukong/widget/extract'
6
+ require 'wukong/widget/logger'
7
+ require 'wukong/widget/echo'
@@ -2,17 +2,80 @@ require 'spec_helper'
2
2
 
3
3
  describe Hanuman::Graph, :hanuman => true do
4
4
 
5
- # it 'makes a tree' do
6
- # example_graph.tree.should == {
7
- # :name => :pie,
8
- # :inputs => [:bake_pie],
9
- # :stages => [
10
- # {:name=>:make_pie, :inputs=>[:crust, :filling]},
11
- # {:name=>:bake_pie, :inputs=>[:make_pie]}
12
- # ],
13
- # }
14
- # end
5
+ include_context "graphs"
6
+
7
+ context "#ancestors" do
8
+ context "called without any arguments" do
9
+ it "returns all stages with an ancestor" do
10
+ graph.ancestors.size.should == 4
11
+ end
12
+ end
13
+
14
+ context "called with a stage as the first argument" do
15
+ it "returns the immediate ancestors of the stage" do
16
+ graph.ancestors(graph.stages[:second]).size.should == 2
17
+ end
18
+ end
19
+ end
15
20
 
21
+ context "#descendents" do
22
+ context "called without any arguments" do
23
+ it "returns all stages with a descendent" do
24
+ graph.descendents.size.should == 4
25
+ end
26
+ end
27
+
28
+ context "called with a stage as the first argument" do
29
+ it "returns the immediate descendents of the stage" do
30
+ graph.descendents(graph.stages[:second]).size.should == 2
31
+ end
32
+ end
33
+ end
34
+
35
+ context "#add_stage" do
36
+ let(:stage) { Hanuman::Stage.receive(label: :orphan) }
37
+ it "adds the stage to the graph" do
38
+ expect { graph.add_stage(stage) }.to change { graph.stages[:orphan] }.from(nil).to(stage)
39
+ end
40
+ it "doesn't create any links" do
41
+ expect { graph.add_stage(stage) }.to_not change { graph.links }
42
+ end
43
+ end
44
+
45
+ context "#add_link" do
46
+ context "when adding a new stage" do
47
+ let(:from) { graph.stages[:fourth] }
48
+ let(:into) { Hanuman::Stage.receive(label: :fifth) }
49
+ it "adds the new stage to the graph" do
50
+ expect { graph.add_link(:simple, from, into) }.to change { graph.stages[:fifth] }.from(nil).to(into)
51
+ end
52
+ it "adds the new link to the graph" do
53
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
54
+ end
55
+ end
56
+ context "when adding an existing link" do
57
+ let(:from) { graph.stages[:third_a] }
58
+ let(:into) { graph.stages[:fourth] }
59
+ it "duplicates the link in the graph" do
60
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
61
+ end
62
+ end
63
+ context "when making a cycle" do
64
+ let(:from) { graph.stages[:fourth] }
65
+ let(:into) { graph.stages[:first_a] }
66
+ it "adds the link in the graph" do
67
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
68
+ end
69
+ end
70
+ context "when linking a stage to itself" do
71
+ let(:from) { tree.stages[:fourth] }
72
+ let(:into) { tree.stages[:fourth] }
73
+ it "adds the link in the graph" do
74
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
75
+ end
76
+ end
77
+ end
78
+
16
79
  end
17
80
 
18
81
  describe Hanuman::GraphBuilder, :hanuman => true do
@@ -63,4 +63,19 @@ describe Hanuman::StageBuilder, :hanuman => true do
63
63
  subject.serialize.should_not include(:links)
64
64
  end
65
65
  end
66
+
67
+ context '#into' do
68
+ subject { described_class.receive(label: :pyro) }
69
+ let(:other_stage){ described_class.receive(label: :iceman) }
70
+
71
+ it 'returns the linked into stage for chaining' do
72
+ subject.into(other_stage).should be(other_stage)
73
+ end
74
+
75
+ it 'links two stages together with a simple link' do
76
+ subject.into(other_stage)
77
+ subject.links.should be_any{ |link| link.from == :pyro and link.into == :iceman }
78
+ end
79
+ end
80
+
66
81
  end
@@ -0,0 +1,119 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hanuman::Tree do
4
+
5
+ include_context "graphs"
6
+
7
+ it "iterates over each stage in tree-order" do
8
+ tree.directed_sort.should == [:first, :second, :third_b, :third_a, :fourth]
9
+ end
10
+
11
+ context "#root" do
12
+ context "when called without arguments" do
13
+ it "returns the root of the whole tree" do
14
+ tree.root.should_not be_nil
15
+ tree.root.label.should == :first
16
+ end
17
+ it "returns the root of a sigle stage tree" do
18
+ single_stage_tree.root.should_not be_nil
19
+ single_stage_tree.root.label.should == :first
20
+ end
21
+
22
+ end
23
+ context "when called with a stage" do
24
+ it "should return the root of the whole tree" do
25
+ tree.root(tree.stages[:fourth]).should_not be_nil
26
+ tree.root(tree.stages[:fourth]).label.should == :first
27
+ end
28
+ end
29
+ end
30
+
31
+ context "#ancestor" do
32
+ it "returns the ancestor of a stage" do
33
+ tree.ancestor(tree.stages[:fourth]).should_not be_nil
34
+ tree.ancestor(tree.stages[:fourth]).label.should == :third_a
35
+ end
36
+ it "returns nil for the root" do
37
+ tree.ancestor(tree.root).should be_nil
38
+ end
39
+ end
40
+
41
+ context "#leaves" do
42
+ it "returns the leaf stages of a tree" do
43
+ tree.leaves.map(&:label).should include(:fourth, :third_b)
44
+ end
45
+
46
+ it "returns the root of a tree that has no other leaves" do
47
+ single_stage_tree.leaves.map(&:label).should include(:first)
48
+ end
49
+
50
+ end
51
+
52
+ context "#prepend" do
53
+ let(:zeroth) { Hanuman::Stage.receive(label: :zeroth) }
54
+ it "adds the given stage" do
55
+ expect { tree.prepend(zeroth) }.to change { tree.stages[:zeroth] }.from(nil).to(zeroth)
56
+ end
57
+ it "adds a link from the new stage to the old root" do
58
+ expect { tree.prepend(zeroth) }.to change { tree.has_link?(zeroth, tree.stages[:first]) }.from(false).to(true)
59
+ end
60
+ it "the root becomes the new stage" do
61
+ expect { tree.prepend(zeroth) }.to change { tree.root.label }.from(:first).to(:zeroth)
62
+ end
63
+ end
64
+
65
+ context "#append" do
66
+ let(:fifth) { Hanuman::Stage.receive(label: :fifth) }
67
+ it "adds a new stage for each leaf" do
68
+ expect { tree.append(fifth) }.to change { tree.stages.size }.by(2)
69
+ end
70
+ it "adds a link for each of the new stages" do
71
+ expect { tree.append(fifth) }.to change { tree.links.size }.by(2)
72
+ end
73
+ it "but doesn't change the number of leaves " do
74
+ expect { tree.append(fifth) }.to_not change { tree.leaves.size }
75
+ end
76
+ end
77
+
78
+ context "#add_link" do
79
+ context "when adding a new stage" do
80
+ let(:from) { tree.stages[:fourth] }
81
+ let(:into) { Hanuman::Stage.receive(label: :fifth) }
82
+ it "adds the new stage to the tree" do
83
+ expect { tree.add_link(:simple, from, into) }.to change { tree.stages[:fifth] }.from(nil).to(into)
84
+ end
85
+ it "adds the new link to the tree" do
86
+ expect { tree.add_link(:simple, from, into) }.to change { tree.links.size }.by(1)
87
+ end
88
+ end
89
+ context "when adding an existing link" do
90
+ let(:from) { tree.stages[:third_a] }
91
+ let(:into) { tree.stages[:fourth] }
92
+ it "doesn't duplicate the link in the tree" do
93
+ expect { tree.add_link(:simple, from, into) }.to_not change { tree.links.size }
94
+ end
95
+ end
96
+ context "when adding a link to a stage with an existing parent" do
97
+ let(:from) { Hanuman::Stage.receive(label: :fifth) }
98
+ let(:into) { tree.stages[:fourth] }
99
+ it "raises an error" do
100
+ expect { tree.add_link(:simple, from, into) }.to raise_error(Hanuman::Tree::MultipleRoots)
101
+ end
102
+ end
103
+ context "when making a cycle" do
104
+ let(:from) { tree.stages[:fourth] }
105
+ let(:into) { tree.stages[:first] }
106
+ it "raises an error" do
107
+ expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
108
+ end
109
+ end
110
+ context "when linking a stage to itself" do
111
+ let(:from) { tree.stages[:fourth] }
112
+ let(:into) { tree.stages[:fourth] }
113
+ it "raises an error" do
114
+ expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
115
+ end
116
+ end
117
+ end
118
+
119
+ end
@@ -1,5 +1,7 @@
1
1
  require 'wukong'
2
2
  require 'wukong/spec_helpers'
3
+ require 'wukong/source'
4
+ require_relative './support/shared_context_for_graphs'
3
5
  require_relative './support/shared_examples_for_shortcuts'
4
6
  require_relative './support/shared_examples_for_builders'
5
7
  require_relative './support/shared_context_for_reducers'
@@ -18,13 +20,23 @@ RSpec.configure do |config|
18
20
  end
19
21
 
20
22
  def generic_runner *args
21
- runner(Wukong::Runner, 'wu-generic', *args)
23
+ # the wu-generic program doesn't have to exist for this Runner to
24
+ # work if it's called from Ruby code
25
+ runner(Wukong::Runner, 'wu-generic', *args)
22
26
  end
23
27
 
24
28
  def wu_local *args
25
29
  command('wu-local', *args)
26
30
  end
27
31
 
32
+ def wu_source *args
33
+ command('wu-source', *args)
34
+ end
35
+
36
+ def wu *args
37
+ command('wu', *args)
38
+ end
39
+
28
40
  # FIXME Why is this here?
29
41
  config.treat_symbols_as_metadata_keys_with_true_values = true
30
42
 
@@ -28,7 +28,6 @@ RSpec.configure do |config|
28
28
  end
29
29
 
30
30
  shared_context 'wukong', :helpers => true do
31
-
32
31
  RSpec::Matchers.define(:be_in){|expected| match{|actual| expected.include?(actual) } }
33
32
 
34
33
  def self.be_ish_matcher(handle, regexp)