wukong 3.0.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +1 -1
  3. data/README.md +253 -45
  4. data/bin/wu +34 -0
  5. data/bin/wu-source +5 -0
  6. data/examples/Gemfile +0 -1
  7. data/examples/deploy_pack/Gemfile +0 -1
  8. data/examples/improver/tweet_summary.rb +73 -0
  9. data/examples/ruby_project/Gemfile +0 -1
  10. data/examples/splitter.rb +94 -0
  11. data/examples/twitter.rb +5 -0
  12. data/lib/hanuman.rb +1 -1
  13. data/lib/hanuman/graph.rb +39 -22
  14. data/lib/hanuman/stage.rb +46 -13
  15. data/lib/hanuman/tree.rb +67 -0
  16. data/lib/wukong.rb +6 -1
  17. data/lib/wukong/dataflow.rb +19 -48
  18. data/lib/wukong/driver.rb +176 -65
  19. data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
  20. data/lib/wukong/driver/wiring.rb +68 -0
  21. data/lib/wukong/local.rb +6 -4
  22. data/lib/wukong/local/runner.rb +14 -16
  23. data/lib/wukong/local/stdio_driver.rb +72 -12
  24. data/lib/wukong/processor.rb +1 -30
  25. data/lib/wukong/runner.rb +2 -0
  26. data/lib/wukong/runner/command_runner.rb +44 -0
  27. data/lib/wukong/source.rb +33 -0
  28. data/lib/wukong/source/source_driver.rb +74 -0
  29. data/lib/wukong/source/source_runner.rb +38 -0
  30. data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
  31. data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
  32. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
  33. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
  34. data/lib/wukong/version.rb +1 -1
  35. data/lib/wukong/widget/echo.rb +55 -0
  36. data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
  37. data/lib/wukong/widget/filters.rb +15 -0
  38. data/lib/wukong/widget/logger.rb +56 -0
  39. data/lib/wukong/widget/operators.rb +82 -0
  40. data/lib/wukong/widget/reducers.rb +2 -0
  41. data/lib/wukong/widget/reducers/improver.rb +71 -0
  42. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  43. data/lib/wukong/widget/serializers.rb +21 -6
  44. data/lib/wukong/widgets.rb +6 -3
  45. data/spec/hanuman/graph_spec.rb +73 -10
  46. data/spec/hanuman/stage_spec.rb +15 -0
  47. data/spec/hanuman/tree_spec.rb +119 -0
  48. data/spec/spec_helper.rb +13 -1
  49. data/spec/support/example_test_helpers.rb +0 -1
  50. data/spec/support/model_test_helpers.rb +1 -1
  51. data/spec/support/shared_context_for_graphs.rb +57 -0
  52. data/spec/support/shared_examples_for_builders.rb +8 -15
  53. data/spec/wukong/driver_spec.rb +152 -0
  54. data/spec/wukong/local/runner_spec.rb +1 -12
  55. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  56. data/spec/wukong/processor_spec.rb +0 -1
  57. data/spec/wukong/runner_spec.rb +2 -2
  58. data/spec/wukong/source_spec.rb +6 -0
  59. data/spec/wukong/widget/extract_spec.rb +101 -0
  60. data/spec/wukong/widget/logger_spec.rb +23 -0
  61. data/spec/wukong/widget/operators_spec.rb +25 -0
  62. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  63. data/spec/wukong/wu-source_spec.rb +32 -0
  64. data/spec/wukong/wu_spec.rb +14 -0
  65. data/wukong.gemspec +1 -2
  66. metadata +45 -28
  67. data/lib/wukong/local/tcp_driver.rb +0 -47
  68. data/spec/wu/geo/geolocated_spec.rb +0 -247
  69. data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -1,4 +1,5 @@
1
1
  require_relative("reducers/accumulator")
2
+ require_relative("reducers/improver")
2
3
  require_relative("reducers/sort")
3
4
  require_relative("reducers/count")
4
5
  require_relative("reducers/group")
@@ -6,3 +7,4 @@ require_relative("reducers/group_concat")
6
7
  require_relative("reducers/moments")
7
8
  require_relative("reducers/bin")
8
9
  require_relative("reducers/uniq")
10
+ require_relative("reducers/join_xml")
@@ -0,0 +1,71 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # A base widget for building more complex improver widgets.
5
+ class Improver < Processor
6
+
7
+ # The current group of records.
8
+ attr_accessor :group
9
+
10
+ # Sets up this improver by defining an initial key (with a
11
+ # value that is unlikely to be found in real data) and calling
12
+ # `#zero` with no record.
13
+ def setup
14
+ @key = :__first_group__
15
+ zero
16
+ end
17
+
18
+ def recordize record
19
+ record.split("\t")
20
+ end
21
+
22
+ #
23
+ # All kinds of assumptions here,
24
+ # record is tab-delimited and the
25
+ # first field is a name of a function
26
+ # to call
27
+ #
28
+ def get_function record
29
+ record.first
30
+ end
31
+
32
+ # Processes the `record`.
33
+ def process(record)
34
+ fields = recordize(record)
35
+ func = get_function(fields)
36
+ case func
37
+ when 'zero' then
38
+ yield zero
39
+ when 'accumulate' then
40
+ accumulate(fields[1..-1])
41
+ when 'improve' then
42
+ yield improve(fields[1], self.group)
43
+ self.group = []
44
+ else
45
+ raise NoMethodError, "undefined method #{func} for Improver"
46
+ end
47
+ STDOUT.flush # WHY? Because.
48
+ end
49
+
50
+ # Starts accumulation for a new key. Return what you would
51
+ # with no improvements.
52
+ def zero
53
+ self.group = []
54
+ end
55
+
56
+ # Accumulates another +record+.
57
+ #
58
+ # @param [Object] record
59
+ def accumulate record
60
+ self.group << record
61
+ end
62
+
63
+ # Improve prev with group
64
+ #
65
+ #
66
+ def improve prev, group
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,37 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # Joins XML input data based on a root tag.
5
+ class JoinXML < Processor
6
+
7
+ field :root, String, default: 'xml', doc: "Name of the root XML element"
8
+
9
+ def setup
10
+ @lines = []
11
+ end
12
+
13
+ def process line
14
+ if match = terminator.match(line)
15
+ if match.end(0) == line.size
16
+ @lines << line
17
+ else
18
+ @lines << line[0...match.end(0)]
19
+ end
20
+ yield @lines.join("\n")
21
+ @lines = []
22
+ @lines << line[match.end(0)..-1] unless match.end(0) == line.size
23
+ else
24
+ @lines << line
25
+ end
26
+ end
27
+
28
+ def terminator
29
+ %r{<\s*/\s*#{root}\s*>}i
30
+ end
31
+
32
+ register :join_xml
33
+ end
34
+ end
35
+ end
36
+
37
+
@@ -4,13 +4,11 @@ module Wukong
4
4
  SerializerError = Class.new(Error)
5
5
 
6
6
  class Serializer < Processor
7
- field :on_error, String, default: 'log', :doc => "Action to take upon an error, either 'log' or 'notify'"
8
7
 
9
8
  def handle_error(record, err)
10
- case on_error
11
- when 'log' then log.warn "#{err.class}: #{err.message}"
12
- when 'notify' then notify('error', record: record, error: err)
13
- end
9
+ return if err.class == Errno::EPIPE
10
+ log.error "#{err.class}: #{err.message}"
11
+ err.backtrace.each { |line| log.debug(line) }
14
12
  end
15
13
 
16
14
  end
@@ -292,10 +290,27 @@ EOF
292
290
  def process(record)
293
291
  wire_format = record.try(:to_wire) || record
294
292
  raise SerializerError.new("Can only recordize a Hash-like record") unless wire_format.is_a?(Hash)
295
- yield model.receive(wire_format)
293
+ klass = model_class_for(wire_format)
294
+ if klass
295
+ yield klass.receive(wire_format)
296
+ else
297
+ log.error("No default model class and no explicit model for: #{wire_format.inspect}")
298
+ end
296
299
  rescue => e
297
300
  handle_error(record, e)
298
301
  end
302
+
303
+ def model_class_for(record)
304
+ if explicit_type = (record[:_type] || record["_type"])
305
+ begin
306
+ return explicit_type.constantize
307
+ rescue NameError => e
308
+ log.warn("Could not find a class for <#{explicit_type}>")
309
+ end
310
+ end
311
+ return model if model
312
+ end
313
+
299
314
  register
300
315
  end
301
316
  end
@@ -1,4 +1,7 @@
1
- require 'wukong/widget/processors'
2
- require 'wukong/widget/reducers'
3
- require 'wukong/widget/serializers'
4
1
  require 'wukong/widget/filters'
2
+ require 'wukong/widget/serializers'
3
+ require 'wukong/widget/operators'
4
+ require 'wukong/widget/reducers'
5
+ require 'wukong/widget/extract'
6
+ require 'wukong/widget/logger'
7
+ require 'wukong/widget/echo'
@@ -2,17 +2,80 @@ require 'spec_helper'
2
2
 
3
3
  describe Hanuman::Graph, :hanuman => true do
4
4
 
5
- # it 'makes a tree' do
6
- # example_graph.tree.should == {
7
- # :name => :pie,
8
- # :inputs => [:bake_pie],
9
- # :stages => [
10
- # {:name=>:make_pie, :inputs=>[:crust, :filling]},
11
- # {:name=>:bake_pie, :inputs=>[:make_pie]}
12
- # ],
13
- # }
14
- # end
5
+ include_context "graphs"
6
+
7
+ context "#ancestors" do
8
+ context "called without any arguments" do
9
+ it "returns all stages with an ancestor" do
10
+ graph.ancestors.size.should == 4
11
+ end
12
+ end
13
+
14
+ context "called with a stage as the first argument" do
15
+ it "returns the immediate ancestors of the stage" do
16
+ graph.ancestors(graph.stages[:second]).size.should == 2
17
+ end
18
+ end
19
+ end
15
20
 
21
+ context "#descendents" do
22
+ context "called without any arguments" do
23
+ it "returns all stages with a descendent" do
24
+ graph.descendents.size.should == 4
25
+ end
26
+ end
27
+
28
+ context "called with a stage as the first argument" do
29
+ it "returns the immediate descendents of the stage" do
30
+ graph.descendents(graph.stages[:second]).size.should == 2
31
+ end
32
+ end
33
+ end
34
+
35
+ context "#add_stage" do
36
+ let(:stage) { Hanuman::Stage.receive(label: :orphan) }
37
+ it "adds the stage to the graph" do
38
+ expect { graph.add_stage(stage) }.to change { graph.stages[:orphan] }.from(nil).to(stage)
39
+ end
40
+ it "doesn't create any links" do
41
+ expect { graph.add_stage(stage) }.to_not change { graph.links }
42
+ end
43
+ end
44
+
45
+ context "#add_link" do
46
+ context "when adding a new stage" do
47
+ let(:from) { graph.stages[:fourth] }
48
+ let(:into) { Hanuman::Stage.receive(label: :fifth) }
49
+ it "adds the new stage to the graph" do
50
+ expect { graph.add_link(:simple, from, into) }.to change { graph.stages[:fifth] }.from(nil).to(into)
51
+ end
52
+ it "adds the new link to the graph" do
53
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
54
+ end
55
+ end
56
+ context "when adding an existing link" do
57
+ let(:from) { graph.stages[:third_a] }
58
+ let(:into) { graph.stages[:fourth] }
59
+ it "duplicates the link in the graph" do
60
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
61
+ end
62
+ end
63
+ context "when making a cycle" do
64
+ let(:from) { graph.stages[:fourth] }
65
+ let(:into) { graph.stages[:first_a] }
66
+ it "adds the link in the graph" do
67
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
68
+ end
69
+ end
70
+ context "when linking a stage to itself" do
71
+ let(:from) { tree.stages[:fourth] }
72
+ let(:into) { tree.stages[:fourth] }
73
+ it "adds the link in the graph" do
74
+ expect { graph.add_link(:simple, from, into) }.to change { graph.links.size }.by(1)
75
+ end
76
+ end
77
+ end
78
+
16
79
  end
17
80
 
18
81
  describe Hanuman::GraphBuilder, :hanuman => true do
@@ -63,4 +63,19 @@ describe Hanuman::StageBuilder, :hanuman => true do
63
63
  subject.serialize.should_not include(:links)
64
64
  end
65
65
  end
66
+
67
+ context '#into' do
68
+ subject { described_class.receive(label: :pyro) }
69
+ let(:other_stage){ described_class.receive(label: :iceman) }
70
+
71
+ it 'returns the linked into stage for chaining' do
72
+ subject.into(other_stage).should be(other_stage)
73
+ end
74
+
75
+ it 'links two stages together with a simple link' do
76
+ subject.into(other_stage)
77
+ subject.links.should be_any{ |link| link.from == :pyro and link.into == :iceman }
78
+ end
79
+ end
80
+
66
81
  end
@@ -0,0 +1,119 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hanuman::Tree do
4
+
5
+ include_context "graphs"
6
+
7
+ it "iterates over each stage in tree-order" do
8
+ tree.directed_sort.should == [:first, :second, :third_b, :third_a, :fourth]
9
+ end
10
+
11
+ context "#root" do
12
+ context "when called without arguments" do
13
+ it "returns the root of the whole tree" do
14
+ tree.root.should_not be_nil
15
+ tree.root.label.should == :first
16
+ end
17
+ it "returns the root of a sigle stage tree" do
18
+ single_stage_tree.root.should_not be_nil
19
+ single_stage_tree.root.label.should == :first
20
+ end
21
+
22
+ end
23
+ context "when called with a stage" do
24
+ it "should return the root of the whole tree" do
25
+ tree.root(tree.stages[:fourth]).should_not be_nil
26
+ tree.root(tree.stages[:fourth]).label.should == :first
27
+ end
28
+ end
29
+ end
30
+
31
+ context "#ancestor" do
32
+ it "returns the ancestor of a stage" do
33
+ tree.ancestor(tree.stages[:fourth]).should_not be_nil
34
+ tree.ancestor(tree.stages[:fourth]).label.should == :third_a
35
+ end
36
+ it "returns nil for the root" do
37
+ tree.ancestor(tree.root).should be_nil
38
+ end
39
+ end
40
+
41
+ context "#leaves" do
42
+ it "returns the leaf stages of a tree" do
43
+ tree.leaves.map(&:label).should include(:fourth, :third_b)
44
+ end
45
+
46
+ it "returns the root of a tree that has no other leaves" do
47
+ single_stage_tree.leaves.map(&:label).should include(:first)
48
+ end
49
+
50
+ end
51
+
52
+ context "#prepend" do
53
+ let(:zeroth) { Hanuman::Stage.receive(label: :zeroth) }
54
+ it "adds the given stage" do
55
+ expect { tree.prepend(zeroth) }.to change { tree.stages[:zeroth] }.from(nil).to(zeroth)
56
+ end
57
+ it "adds a link from the new stage to the old root" do
58
+ expect { tree.prepend(zeroth) }.to change { tree.has_link?(zeroth, tree.stages[:first]) }.from(false).to(true)
59
+ end
60
+ it "the root becomes the new stage" do
61
+ expect { tree.prepend(zeroth) }.to change { tree.root.label }.from(:first).to(:zeroth)
62
+ end
63
+ end
64
+
65
+ context "#append" do
66
+ let(:fifth) { Hanuman::Stage.receive(label: :fifth) }
67
+ it "adds a new stage for each leaf" do
68
+ expect { tree.append(fifth) }.to change { tree.stages.size }.by(2)
69
+ end
70
+ it "adds a link for each of the new stages" do
71
+ expect { tree.append(fifth) }.to change { tree.links.size }.by(2)
72
+ end
73
+ it "but doesn't change the number of leaves " do
74
+ expect { tree.append(fifth) }.to_not change { tree.leaves.size }
75
+ end
76
+ end
77
+
78
+ context "#add_link" do
79
+ context "when adding a new stage" do
80
+ let(:from) { tree.stages[:fourth] }
81
+ let(:into) { Hanuman::Stage.receive(label: :fifth) }
82
+ it "adds the new stage to the tree" do
83
+ expect { tree.add_link(:simple, from, into) }.to change { tree.stages[:fifth] }.from(nil).to(into)
84
+ end
85
+ it "adds the new link to the tree" do
86
+ expect { tree.add_link(:simple, from, into) }.to change { tree.links.size }.by(1)
87
+ end
88
+ end
89
+ context "when adding an existing link" do
90
+ let(:from) { tree.stages[:third_a] }
91
+ let(:into) { tree.stages[:fourth] }
92
+ it "doesn't duplicate the link in the tree" do
93
+ expect { tree.add_link(:simple, from, into) }.to_not change { tree.links.size }
94
+ end
95
+ end
96
+ context "when adding a link to a stage with an existing parent" do
97
+ let(:from) { Hanuman::Stage.receive(label: :fifth) }
98
+ let(:into) { tree.stages[:fourth] }
99
+ it "raises an error" do
100
+ expect { tree.add_link(:simple, from, into) }.to raise_error(Hanuman::Tree::MultipleRoots)
101
+ end
102
+ end
103
+ context "when making a cycle" do
104
+ let(:from) { tree.stages[:fourth] }
105
+ let(:into) { tree.stages[:first] }
106
+ it "raises an error" do
107
+ expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
108
+ end
109
+ end
110
+ context "when linking a stage to itself" do
111
+ let(:from) { tree.stages[:fourth] }
112
+ let(:into) { tree.stages[:fourth] }
113
+ it "raises an error" do
114
+ expect { tree.add_link(:simple, from, into) }.to raise_error(TSort::Cyclic)
115
+ end
116
+ end
117
+ end
118
+
119
+ end
@@ -1,5 +1,7 @@
1
1
  require 'wukong'
2
2
  require 'wukong/spec_helpers'
3
+ require 'wukong/source'
4
+ require_relative './support/shared_context_for_graphs'
3
5
  require_relative './support/shared_examples_for_shortcuts'
4
6
  require_relative './support/shared_examples_for_builders'
5
7
  require_relative './support/shared_context_for_reducers'
@@ -18,13 +20,23 @@ RSpec.configure do |config|
18
20
  end
19
21
 
20
22
  def generic_runner *args
21
- runner(Wukong::Runner, 'wu-generic', *args)
23
+ # the wu-generic program doesn't have to exist for this Runner to
24
+ # work if it's called from Ruby code
25
+ runner(Wukong::Runner, 'wu-generic', *args)
22
26
  end
23
27
 
24
28
  def wu_local *args
25
29
  command('wu-local', *args)
26
30
  end
27
31
 
32
+ def wu_source *args
33
+ command('wu-source', *args)
34
+ end
35
+
36
+ def wu *args
37
+ command('wu', *args)
38
+ end
39
+
28
40
  # FIXME Why is this here?
29
41
  config.treat_symbols_as_metadata_keys_with_true_values = true
30
42
 
@@ -28,7 +28,6 @@ RSpec.configure do |config|
28
28
  end
29
29
 
30
30
  shared_context 'wukong', :helpers => true do
31
-
32
31
  RSpec::Matchers.define(:be_in){|expected| match{|actual| expected.include?(actual) } }
33
32
 
34
33
  def self.be_ish_matcher(handle, regexp)