wukong 3.0.0.pre2 → 3.0.0.pre3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. data/Gemfile +13 -0
  2. data/README.md +182 -6
  3. data/bin/wu-local +13 -5
  4. data/bin/wu-server +1 -1
  5. data/examples/Gemfile +2 -1
  6. data/examples/basic/string_reverser.rb +23 -0
  7. data/examples/{tiny_count.rb → basic/tiny_count.rb} +0 -0
  8. data/examples/{word_count → basic/word_count}/accumulator.rb +0 -0
  9. data/examples/{word_count → basic/word_count}/tokenizer.rb +0 -0
  10. data/examples/{word_count → basic/word_count}/word_count.rb +0 -0
  11. data/examples/deploy_pack/Gemfile +7 -0
  12. data/examples/deploy_pack/README.md +6 -0
  13. data/examples/{text/latinize_text.rb → deploy_pack/a/b/c/.gitkeep} +0 -0
  14. data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
  15. data/examples/deploy_pack/config/environment.rb +1 -0
  16. data/examples/{dataflow → dsl/dataflow}/fibonacci_series.rb +0 -0
  17. data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
  18. data/examples/{dataflow → dsl/dataflow}/simple.rb +0 -0
  19. data/examples/{dataflow → dsl/dataflow}/telegram.rb +0 -0
  20. data/examples/{workflow → dsl/workflow}/cherry_pie.dot +0 -0
  21. data/examples/{workflow → dsl/workflow}/cherry_pie.md +0 -0
  22. data/examples/{workflow → dsl/workflow}/cherry_pie.png +0 -0
  23. data/examples/{workflow → dsl/workflow}/cherry_pie.rb +0 -0
  24. data/examples/empty/.gitkeep +0 -0
  25. data/examples/graph/implied_geolocation/README.md +63 -0
  26. data/examples/graph/{minimum_spanning_tree.rb → minimum_spanning_tree/airfares_graphviz.rb} +0 -0
  27. data/examples/munging/airline_flights/indexable.rb +75 -0
  28. data/examples/munging/airline_flights/indexable_spec.rb +90 -0
  29. data/examples/munging/geo/geonames_models.rb +29 -0
  30. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +1 -0
  31. data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
  32. data/examples/munging/wikipedia/dbpedia/extract_links.rb +213 -146
  33. data/examples/rake_helper.rb +12 -0
  34. data/examples/ruby_project/Gemfile +7 -0
  35. data/examples/ruby_project/README.md +6 -0
  36. data/examples/ruby_project/a/b/c/.gitkeep +0 -0
  37. data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
  38. data/examples/serverlogs/models/logline.rb +102 -0
  39. data/examples/{dataflow/parse_apache_logs.rb → serverlogs/parser/apache_parser_widget.rb} +0 -0
  40. data/examples/serverlogs/visit_paths/common.rb +4 -0
  41. data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
  42. data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
  43. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
  44. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
  45. data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
  46. data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
  47. data/examples/text/{pig_latin.rb → pig_latin/pig_latinizer.rb} +0 -0
  48. data/examples/{dataflow/pig_latinizer.rb → text/pig_latin/pig_latinizer_widget.rb} +0 -0
  49. data/lib/hanuman/graph.rb +6 -1
  50. data/lib/wu/geo.rb +4 -0
  51. data/lib/wu/geo/geo_grids.numbers +0 -0
  52. data/lib/wu/geo/geolocated.rb +331 -0
  53. data/lib/wu/geo/quadtile.rb +69 -0
  54. data/{examples → lib/wu}/graph/union_find.rb +0 -0
  55. data/lib/wu/model/reconcilable.rb +63 -0
  56. data/{examples/munging/wikipedia/utils/munging_utils.rb → lib/wu/munging.rb} +7 -4
  57. data/lib/wu/social/models/twitter.rb +31 -0
  58. data/{examples/models/wikipedia.rb → lib/wu/wikipedia/models.rb} +0 -0
  59. data/lib/wukong.rb +9 -4
  60. data/lib/wukong/boot.rb +10 -1
  61. data/lib/wukong/driver.rb +65 -71
  62. data/lib/wukong/logger.rb +93 -0
  63. data/lib/wukong/processor.rb +38 -29
  64. data/lib/wukong/runner.rb +144 -0
  65. data/lib/wukong/server.rb +119 -0
  66. data/lib/wukong/spec_helpers.rb +1 -0
  67. data/lib/wukong/spec_helpers/integration_driver.rb +22 -9
  68. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +26 -4
  69. data/lib/wukong/spec_helpers/processor_helpers.rb +4 -10
  70. data/lib/wukong/spec_helpers/shared_examples.rb +12 -13
  71. data/lib/wukong/version.rb +1 -1
  72. data/lib/wukong/widget/processors.rb +13 -0
  73. data/lib/wukong/widget/serializers.rb +55 -65
  74. data/lib/wukong/widgets.rb +0 -2
  75. data/spec/hanuman/graph_spec.rb +14 -0
  76. data/spec/spec_helper.rb +4 -30
  77. data/spec/support/{wukong_test_helpers.rb → example_test_helpers.rb} +29 -2
  78. data/spec/support/integration_helper.rb +38 -0
  79. data/spec/support/model_test_helpers.rb +115 -0
  80. data/spec/wu/geo/geolocated_spec.rb +247 -0
  81. data/spec/wu/model/reconcilable_spec.rb +152 -0
  82. data/spec/wukong/widget/processors_spec.rb +0 -1
  83. data/spec/wukong/widget/serializers_spec.rb +88 -62
  84. data/spec/wukong/wu_local_spec.rb +125 -0
  85. data/wukong.gemspec +3 -16
  86. metadata +72 -266
  87. data/examples/dataflow/apache_log_line.rb +0 -100
  88. data/examples/jabberwocky.txt +0 -36
  89. data/examples/munging/Gemfile +0 -8
  90. data/examples/munging/airline_flights/airline.rb +0 -57
  91. data/examples/munging/airline_flights/airport.rb +0 -211
  92. data/examples/munging/airline_flights/flight.rb +0 -156
  93. data/examples/munging/airline_flights/models.rb +0 -4
  94. data/examples/munging/airline_flights/parse.rb +0 -26
  95. data/examples/munging/airline_flights/route.rb +0 -35
  96. data/examples/munging/airline_flights/timezone_fixup.rb +0 -62
  97. data/examples/munging/airports/40_wbans.txt +0 -40
  98. data/examples/munging/airports/filter_weather_reports.rb +0 -37
  99. data/examples/munging/airports/join.pig +0 -31
  100. data/examples/munging/airports/to_tsv.rb +0 -33
  101. data/examples/munging/airports/usa_wbans.pig +0 -19
  102. data/examples/munging/airports/usa_wbans.txt +0 -2157
  103. data/examples/munging/airports/wbans.pig +0 -19
  104. data/examples/munging/airports/wbans.txt +0 -2310
  105. data/examples/munging/rake_helper.rb +0 -62
  106. data/examples/munging/weather/.gitignore +0 -1
  107. data/examples/munging/weather/Gemfile +0 -4
  108. data/examples/munging/weather/Rakefile +0 -28
  109. data/examples/munging/weather/extract_ish.rb +0 -13
  110. data/examples/munging/weather/models/weather.rb +0 -119
  111. data/examples/munging/weather/utils/noaa_downloader.rb +0 -46
  112. data/examples/munging/wikipedia/README.md +0 -34
  113. data/examples/munging/wikipedia/Rakefile +0 -193
  114. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +0 -18
  115. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +0 -21
  116. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +0 -27
  117. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +0 -29
  118. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +0 -14
  119. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +0 -25
  120. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +0 -29
  121. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +0 -32
  122. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +0 -85
  123. data/examples/munging/wikipedia/pig_style_guide.md +0 -25
  124. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +0 -19
  125. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +0 -23
  126. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +0 -24
  127. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +0 -22
  128. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +0 -22
  129. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +0 -26
  130. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +0 -29
  131. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +0 -24
  132. data/examples/munging/wikipedia/utils/get_namespaces.rb +0 -86
  133. data/examples/munging/wikipedia/utils/namespaces.json +0 -1
  134. data/examples/string_reverser.rb +0 -26
  135. data/examples/twitter/locations.rb +0 -29
  136. data/examples/twitter/models.rb +0 -24
  137. data/examples/twitter/pt1-fiddle.pig +0 -8
  138. data/examples/twitter/pt2-simple_parse.pig +0 -31
  139. data/examples/twitter/pt2-simple_parse.rb +0 -18
  140. data/examples/twitter/pt3-join_on_zips.pig +0 -39
  141. data/examples/twitter/pt4-strong_links.rb +0 -20
  142. data/examples/twitter/pt5-lnglat_and_strong_links.pig +0 -16
  143. data/examples/twitter/states.tsv +0 -50
  144. data/examples/workflow/package_gem.rb +0 -55
  145. data/lib/wukong/widget/sink.rb +0 -16
  146. data/lib/wukong/widget/source.rb +0 -14
@@ -1,7 +1,5 @@
1
1
  # encoding:UTF-8
2
2
 
3
- require 'multi_json'
4
-
5
3
  module MungingUtils
6
4
  extend self # you can call MungingUtils.foo, or include it and call on self.
7
5
 
@@ -37,14 +35,19 @@ module MungingUtils
37
35
  text
38
36
  end
39
37
 
38
+ # For follow-on escaping of XML-encoded text.
39
+ #
40
40
  # Modifies the text in place, replacing all non-keyboard characters (newline,
41
41
  # tab, anything not between ascii 0x20 and 0x7e) with their XML entity encoding
42
+ #
43
+ # NOTE: does NOT escape the ampersand character
44
+ # NOTE: modifies the text in-place
45
+ #
42
46
  def safe_xml_encode(text)
43
- text.gsub!(NON_PLAIN_ASCII_RE){|ch| "\\u%04x" % ch.ord } unless jsonized.ascii_only?
47
+ text.gsub!(NON_PLAIN_ASCII_RE){|ch| "&#%04x;" % ch.ord } unless jsonized.ascii_only?
44
48
  text
45
49
  end
46
50
 
47
-
48
51
  # Returns a JSON encoded string, with all non-ASCII characters escaped
49
52
  def safe_json_encode(string)
50
53
  jsonized = MultiJson.encode(string)
@@ -0,0 +1,31 @@
1
+ module Wu
2
+ module Social
3
+ module Model
4
+
5
+ class TwitterUser
6
+ include Gorillib::Model
7
+
8
+ field :user_id, Integer
9
+ field :screen_name, String
10
+ field :created_at, Time
11
+ field :scraped_at, String
12
+ field :protected, Boolean
13
+ field :followers, Integer
14
+ field :friends, Integer
15
+ field :tweets_count, Integer
16
+ field :geo_enabled, :boolean
17
+ field :time_zone, String
18
+ field :utc_offset, String
19
+ field :lang, String
20
+ field :location, String
21
+ field :url, String
22
+
23
+ def receive_protected(val)
24
+ self.protected = (val.to_s == 'true' || val.to_s == '1')
25
+ end
26
+ def protected? ; !! self.protected ; end
27
+ end
28
+
29
+ end
30
+ end
31
+ end
data/lib/wukong.rb CHANGED
@@ -1,17 +1,19 @@
1
1
  require 'configliere'
2
- require 'gorillib/logger/log'
3
2
  require 'vayacondios-client'
4
3
  require 'multi_json'
4
+ require 'eventmachine'
5
+ require 'log4r'
5
6
 
6
7
  require 'hanuman'
7
8
 
9
+ require 'wukong/logger'
8
10
  require 'wukong/processor'
9
- require 'wukong/driver'
10
-
11
- require 'wukong/emitter'
12
11
  require 'wukong/dataflow'
13
12
  require 'wukong/configuration'
14
13
  require 'wukong/widgets'
14
+ require 'wukong/driver'
15
+ require 'wukong/server'
16
+ # require 'wukong/runner'
15
17
 
16
18
  # The Wukong module will contain all code for Wukong's core (like
17
19
  # Processors and Dataflows) as well as all plugins.
@@ -29,3 +31,6 @@ module Wukong
29
31
  add_shortcut_method_for(:dataflow, DataflowBuilder)
30
32
 
31
33
  end
34
+
35
+ # Alias module name for shorter namespaces
36
+ Wu = Wukong
data/lib/wukong/boot.rb CHANGED
@@ -58,11 +58,12 @@ module Wukong
58
58
  #
59
59
  # @return [String]
60
60
  def self.deploy_pack_dir
61
+ return File.dirname(ENV["BUNDLE_GEMFILE"]) if ENV["BUNDLE_GEMFILE"] && is_deploy_pack_dir?(File.dirname(ENV["BUNDLE_GEMFILE"]))
61
62
  return @deploy_pack_dir if @deploy_pack_dir
62
63
  wd = Dir.pwd
63
64
  parent = File.dirname(wd)
64
65
  until wd == parent
65
- return wd if File.exist?(File.join(wd, 'Gemfile')) && File.exist?(File.join(wd, 'config', 'environment.rb'))
66
+ return wd if is_deploy_pack_dir?(wd)
66
67
  wd = parent
67
68
  parent = File.dirname(wd)
68
69
  end
@@ -82,6 +83,14 @@ module Wukong
82
83
  def self.load_environment
83
84
  require environment_file
84
85
  end
86
+
87
+ # Could `dir` be a deploy pack dir?
88
+ #
89
+ # @param [String] dir
90
+ # @return [true, false]
91
+ def self.is_deploy_pack_dir? dir
92
+ dir && !dir.empty? && File.directory?(dir) && File.exist?(File.join(dir, 'Gemfile')) && File.exist?(File.join(dir, 'config', 'environment.rb'))
93
+ end
85
94
  end
86
95
 
87
96
  end
data/lib/wukong/driver.rb CHANGED
@@ -1,99 +1,93 @@
1
- module Wukong
2
- class Driver
1
+ module Wukong
2
+ module DriverMethods
3
3
 
4
- class << self
5
- def run(dataflow, options)
6
- new(dataflow, options).run!
7
- end
4
+ def driver
5
+ @driver ||= Driver.new(dataflow)
8
6
  end
9
7
 
10
8
  def lookup(label)
11
- builder = Wukong.registry.retrieve(label)
12
- builder.build(@settings)
9
+ raise Wukong::Error.new("could not find definition for #{label}") unless Wukong.registry.registered?(label.to_sym)
10
+ Wukong.registry.retrieve(label.to_sym)
13
11
  end
14
12
 
15
- def wire(flow)
16
- using = @settings[:wiring] || :emitter
17
- if flow.complete?
18
- flow.links.each do |link|
19
- Hanuman::LinkFactory.connect(using, flow.stages[link.from], flow.stages[link.into])
20
- end
21
- end
22
- flow
13
+ def lookup_and_build(label, options = {})
14
+ lookup(label).build(options)
23
15
  end
24
- end
25
-
26
- class LocalDriver < Driver
27
- attr_accessor :dataflow, :source
28
16
 
29
- def initialize(dataflow, options = {})
30
- builder = (Wukong.registry.retrieve(dataflow) or raise Error.new("No such processor or dataflow: #{dataflow.inspect}"))
31
- dataflow = builder.build(options)
32
- @dataflow = dataflow.respond_to?(:stages) ? dataflow.directed_sort.map{ |name| dataflow.stages[name] } : [ dataflow ]
33
- @dataflow << Wukong.registry.retrieve(:stdout).build
34
- @source = Wukong.registry.retrieve(:stdin).build
17
+ def build_serializer(direction, label, options)
18
+ lookup_and_build("#{direction}_#{label}", options)
35
19
  end
36
20
 
37
- def emitter
38
- @emitter ||= ProcEmitter.new(dataflow)
39
- end
40
-
41
- def stage_iterator(stage)
42
- return dataflow.first if stage.nil?
43
- position = dataflow.find_index(stage)
44
- dataflow[position + 1]
21
+ def add_serialization(dataflow, direction, label, options)
22
+ case direction
23
+ when :to then dataflow.push build_serializer(direction, label, options)
24
+ when :from then dataflow.unshift build_serializer(direction, label, options)
25
+ end
45
26
  end
46
27
 
47
- def next_record(&blk)
48
- trap('SIGINT'){ break }
49
- source.process(&blk)
28
+ def setup_dataflow
29
+ dataflow.each(&:setup)
50
30
  end
51
31
 
52
- def run!
53
- dataflow.each(&:setup)
54
- next_record do |record|
55
- emitter.send_through_dataflow(record)
56
- end
32
+ def finalize_and_stop_dataflow
57
33
  dataflow.each do |stage|
58
- stage.finalize(&emitter.advance(stage)) if stage.respond_to?(:finalize)
34
+ stage.finalize(&driver.advance(stage)) if stage.respond_to?(:finalize)
59
35
  stage.stop
60
- end
61
- nil
36
+ end
62
37
  end
38
+
39
+ # So pretty...
40
+ def construct_dataflow(label, options)
41
+ dataflow = lookup_and_build(label, options)
42
+ dataflow = dataflow.respond_to?(:stages) ? dataflow.directed_sort.map{ |name| dataflow.stages[name] } : [ dataflow ]
43
+ expected_input_model = (options[:consumes].constantize rescue nil) || dataflow.first.expected_record_type(:consumes)
44
+ dataflow.unshift lookup_and_build(:recordize, model: expected_input_model) if expected_input_model
45
+ expected_output_model = (options[:produces].constantize rescue nil) || dataflow.first.expected_record_type(:produces)
46
+ dataflow.push lookup_and_build(:recordize, model: expected_output_model) if expected_output_model
47
+ expected_input_serialization = options[:from] || dataflow.last.expected_serialization(:from)
48
+ add_serialization(dataflow, :from, expected_input_serialization, options) if expected_input_serialization
49
+ expected_output_serialization = options[:to] || dataflow.last.expected_serialization(:to)
50
+ add_serialization(dataflow, :to, expected_output_serialization, options) if expected_output_serialization
51
+ dataflow.push self
52
+ end
63
53
  end
64
- end
65
54
 
66
- class ProcEmitter
55
+ class Driver
56
+ attr_accessor :dataflow
67
57
 
68
- attr_accessor :dataflow
58
+ def initialize(dataflow)
59
+ @dataflow = dataflow
60
+ end
69
61
 
70
- def initialize(dataflow)
71
- @dataflow = dataflow
72
- end
62
+ def to_proc
63
+ return @wiring if @wiring
64
+ @wiring = Proc.new do |stage, record|
65
+ stage.process(record, &advance(stage)) if stage
66
+ end
67
+ end
73
68
 
74
- def to_proc
75
- return @wiring if @wiring
76
- @wiring = Proc.new do |stage, record|
77
- stage.process(record, &advance(stage)) if stage
69
+ def send_through_dataflow(record)
70
+ start_with(dataflow.first).call(record)
71
+ end
72
+
73
+ def start_with(stage)
74
+ to_proc.curry.call(stage)
78
75
  end
79
- end
80
76
 
81
- def send_through_dataflow(record)
82
- self.call(dataflow.first, record)
83
- end
84
-
85
- def advance(stage)
86
- next_stage = stage_iterator(stage)
87
- self.to_proc.curry.call(next_stage)
88
- end
77
+ def advance(stage)
78
+ next_stage = stage_iterator(stage)
79
+ start_with(next_stage)
80
+ end
89
81
 
90
- def stage_iterator(stage)
91
- position = dataflow.find_index(stage)
92
- dataflow[position + 1]
93
- end
82
+ # This should properly be defined on dataflow/builder
83
+ def stage_iterator(stage)
84
+ position = dataflow.find_index(stage)
85
+ dataflow[position + 1]
86
+ end
94
87
 
95
- def call(*args)
96
- to_proc.call(*args)
88
+ def call(*args)
89
+ to_proc.call(*args)
90
+ end
91
+
97
92
  end
98
-
99
93
  end
@@ -0,0 +1,93 @@
1
+ module Wukong
2
+ class LogFactory
3
+
4
+ attr_reader :created_log
5
+
6
+ def self.defaults
7
+ Log4r::StderrOutputter.new('console', formatter: Log4r::PatternFormatter.new(pattern: "%l %d [%-20c] -- %m"))
8
+ end
9
+
10
+ def self.configure(klass, options = {})
11
+ factory = new(klass, options)
12
+ factory.created_log
13
+ end
14
+
15
+ def initialize(logger, config)
16
+ @created_log = logger.is_a?(Log4r::Logger) ? logger : Log4r::Logger.new(logger.to_s)
17
+ outputter(LogFactory.defaults) unless ancestry_has_outputter?(@created_log)
18
+ apply_options(config)
19
+ end
20
+
21
+ def ancestry_has_outputter? lgr
22
+ if lgr.respond_to?(:outputters) && !lgr.outputters.empty?
23
+ true
24
+ elsif lgr.respond_to?(:parent) && !lgr.parent.nil?
25
+ ancestry_has_outputter? lgr.parent
26
+ else
27
+ false
28
+ end
29
+ end
30
+
31
+ def apply_options config
32
+ config.each_pair do |option, value|
33
+ begin
34
+ send(option, value)
35
+ rescue
36
+ raise "invalid log option"
37
+ end
38
+ end
39
+ end
40
+
41
+ def outputter outptr
42
+ created_log.outputters = outptr
43
+ end
44
+
45
+ def level lvl
46
+ created_log.level = lookup_level(lvl.to_sym)
47
+ end
48
+
49
+ def lookup_level lvl
50
+ {
51
+ debug: Log4r::DEBUG,
52
+ info: Log4r::INFO,
53
+ warn: Log4r::WARN
54
+ }.fetch(lvl){ raise "invalid log level" }
55
+ end
56
+
57
+ def pattern ptrn
58
+ created_log.outputters.each do |output|
59
+ keep_date_format = output.formatter.date_pattern
60
+ output.formatter = Log4r::PatternFormatter.new(pattern: ptrn, date_format: keep_date_format)
61
+ end
62
+ end
63
+
64
+ def date_format fmt
65
+ created_log.outputters.each do |output|
66
+ keep_pattern = output.formatter.pattern
67
+ output.formatter = Log4r::PatternFormatter.new(pattern: keep_pattern, date_format: fmt)
68
+ end
69
+ end
70
+ end
71
+
72
+ # Mixin for logging behavior
73
+ module Logging
74
+
75
+ def self.included klass
76
+ if klass.ancestors.include?(Gorillib::Model)
77
+ klass.class_eval do
78
+ field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) })
79
+
80
+ def receive_log params
81
+ @log = LogFactory.configure(self.class, params)
82
+ end
83
+ end
84
+ else
85
+ klass.class_attribute :log
86
+ klass.log = LogFactory.configure(klass)
87
+ end
88
+ end
89
+ end
90
+
91
+ # Default log. Parent to all logs created in Wukong namespace
92
+ Log = LogFactory.configure(Wukong)
93
+ end
@@ -1,5 +1,3 @@
1
- require 'log4r'
2
-
3
1
  module Wukong
4
2
  class ProcessorBuilder < Hanuman::StageBuilder
5
3
  def namespace(*args)
@@ -14,25 +12,51 @@ module Wukong
14
12
  # A Processor can be written and tested purely in Ruby and on your
15
13
  # local machine. You can glue processors together
16
14
  class Processor < Hanuman::Stage
15
+ include Logging
16
+ include Vayacondios::Notifications
17
17
 
18
18
  field :action, Whatever
19
- field :log, Whatever, :default => -> { log = Log4r::Logger.new(self.class.to_s) ; log.outputters = Log4r::StdoutOutputter.new('stdout', formatter: Log4r::PatternFormatter.new(pattern: "%d [%l] %c: %m")) ; log }
20
- field :notifier, Vayacondios::NotifierFactory, :default => Vayacondios.default_notifier
21
19
 
22
- def self.describe desc
23
- @description = desc
24
- end
20
+ class << self
25
21
 
26
- def self.description
27
- @description
28
- end
22
+ def describe desc
23
+ @description = desc
24
+ end
25
+
26
+ def description
27
+ @description
28
+ end
29
+
30
+ def consumes(*args)
31
+ options = args.extract_options!
32
+ @consumes = options[:as]
33
+ validate_and_set_serialization(:from, args.first)
34
+ end
29
35
 
30
- def self.consumes label
31
- end
36
+ def produces(*args)
37
+ options = args.extract_options!
38
+ @produces = options[:as]
39
+ validate_and_set_serialization(:to, args.first)
40
+ end
41
+
42
+ def valid_serializer? label
43
+ label
44
+ end
32
45
 
33
- def self.produces label
34
- end
46
+ def validate_and_set_serialization(direction, label)
47
+ instance_variable_set("@serialization_#{direction}", label) if %w[ tsv json xml ].include?(label.to_s)
48
+ end
35
49
 
50
+ end
51
+
52
+ def expected_record_type(type)
53
+ self.class.instance_variable_get("@#{type}")
54
+ end
55
+
56
+ def expected_serialization(direction)
57
+ self.class.instance_variable_get("@serialization_#{direction.to_s}")
58
+ end
59
+
36
60
  # This is a placeholder method intended to be overridden
37
61
  def perform_action(*args) ; end
38
62
 
@@ -41,21 +65,6 @@ module Wukong
41
65
  self.define_singleton_method(:perform_action, &action)
42
66
  end
43
67
 
44
- # Valid notifier types are currently :http or :log
45
- # This processor's log is passed to vayacondios
46
- def receive_notifier(type)
47
- if type.is_a?(Hash)
48
- @notifier = Vayacondios::NotifierFactory.receive({type: 'log'}.merge(type))
49
- else
50
- @notifier = Vayacondios::NotifierFactory.receive(type: type, log: log)
51
- end
52
- end
53
-
54
- # Send information to Vayacondios; data goes in, the right thing happens
55
- def notify(topic, cargo)
56
- notifier.notify(topic, cargo)
57
- end
58
-
59
68
  # This method is called after the processor class has been instantiated
60
69
  # but before any records are given to it to process
61
70
  def setup