wukong 3.0.0.pre2 → 3.0.0.pre3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +13 -0
- data/README.md +182 -6
- data/bin/wu-local +13 -5
- data/bin/wu-server +1 -1
- data/examples/Gemfile +2 -1
- data/examples/basic/string_reverser.rb +23 -0
- data/examples/{tiny_count.rb → basic/tiny_count.rb} +0 -0
- data/examples/{word_count → basic/word_count}/accumulator.rb +0 -0
- data/examples/{word_count → basic/word_count}/tokenizer.rb +0 -0
- data/examples/{word_count → basic/word_count}/word_count.rb +0 -0
- data/examples/deploy_pack/Gemfile +7 -0
- data/examples/deploy_pack/README.md +6 -0
- data/examples/{text/latinize_text.rb → deploy_pack/a/b/c/.gitkeep} +0 -0
- data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
- data/examples/deploy_pack/config/environment.rb +1 -0
- data/examples/{dataflow → dsl/dataflow}/fibonacci_series.rb +0 -0
- data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
- data/examples/{dataflow → dsl/dataflow}/simple.rb +0 -0
- data/examples/{dataflow → dsl/dataflow}/telegram.rb +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.dot +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.md +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.png +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.rb +0 -0
- data/examples/empty/.gitkeep +0 -0
- data/examples/graph/implied_geolocation/README.md +63 -0
- data/examples/graph/{minimum_spanning_tree.rb → minimum_spanning_tree/airfares_graphviz.rb} +0 -0
- data/examples/munging/airline_flights/indexable.rb +75 -0
- data/examples/munging/airline_flights/indexable_spec.rb +90 -0
- data/examples/munging/geo/geonames_models.rb +29 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +1 -0
- data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +213 -146
- data/examples/rake_helper.rb +12 -0
- data/examples/ruby_project/Gemfile +7 -0
- data/examples/ruby_project/README.md +6 -0
- data/examples/ruby_project/a/b/c/.gitkeep +0 -0
- data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/serverlogs/models/logline.rb +102 -0
- data/examples/{dataflow/parse_apache_logs.rb → serverlogs/parser/apache_parser_widget.rb} +0 -0
- data/examples/serverlogs/visit_paths/common.rb +4 -0
- data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
- data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
- data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
- data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
- data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
- data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
- data/examples/text/{pig_latin.rb → pig_latin/pig_latinizer.rb} +0 -0
- data/examples/{dataflow/pig_latinizer.rb → text/pig_latin/pig_latinizer_widget.rb} +0 -0
- data/lib/hanuman/graph.rb +6 -1
- data/lib/wu/geo.rb +4 -0
- data/lib/wu/geo/geo_grids.numbers +0 -0
- data/lib/wu/geo/geolocated.rb +331 -0
- data/lib/wu/geo/quadtile.rb +69 -0
- data/{examples → lib/wu}/graph/union_find.rb +0 -0
- data/lib/wu/model/reconcilable.rb +63 -0
- data/{examples/munging/wikipedia/utils/munging_utils.rb → lib/wu/munging.rb} +7 -4
- data/lib/wu/social/models/twitter.rb +31 -0
- data/{examples/models/wikipedia.rb → lib/wu/wikipedia/models.rb} +0 -0
- data/lib/wukong.rb +9 -4
- data/lib/wukong/boot.rb +10 -1
- data/lib/wukong/driver.rb +65 -71
- data/lib/wukong/logger.rb +93 -0
- data/lib/wukong/processor.rb +38 -29
- data/lib/wukong/runner.rb +144 -0
- data/lib/wukong/server.rb +119 -0
- data/lib/wukong/spec_helpers.rb +1 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +22 -9
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +26 -4
- data/lib/wukong/spec_helpers/processor_helpers.rb +4 -10
- data/lib/wukong/spec_helpers/shared_examples.rb +12 -13
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/processors.rb +13 -0
- data/lib/wukong/widget/serializers.rb +55 -65
- data/lib/wukong/widgets.rb +0 -2
- data/spec/hanuman/graph_spec.rb +14 -0
- data/spec/spec_helper.rb +4 -30
- data/spec/support/{wukong_test_helpers.rb → example_test_helpers.rb} +29 -2
- data/spec/support/integration_helper.rb +38 -0
- data/spec/support/model_test_helpers.rb +115 -0
- data/spec/wu/geo/geolocated_spec.rb +247 -0
- data/spec/wu/model/reconcilable_spec.rb +152 -0
- data/spec/wukong/widget/processors_spec.rb +0 -1
- data/spec/wukong/widget/serializers_spec.rb +88 -62
- data/spec/wukong/wu_local_spec.rb +125 -0
- data/wukong.gemspec +3 -16
- metadata +72 -266
- data/examples/dataflow/apache_log_line.rb +0 -100
- data/examples/jabberwocky.txt +0 -36
- data/examples/munging/Gemfile +0 -8
- data/examples/munging/airline_flights/airline.rb +0 -57
- data/examples/munging/airline_flights/airport.rb +0 -211
- data/examples/munging/airline_flights/flight.rb +0 -156
- data/examples/munging/airline_flights/models.rb +0 -4
- data/examples/munging/airline_flights/parse.rb +0 -26
- data/examples/munging/airline_flights/route.rb +0 -35
- data/examples/munging/airline_flights/timezone_fixup.rb +0 -62
- data/examples/munging/airports/40_wbans.txt +0 -40
- data/examples/munging/airports/filter_weather_reports.rb +0 -37
- data/examples/munging/airports/join.pig +0 -31
- data/examples/munging/airports/to_tsv.rb +0 -33
- data/examples/munging/airports/usa_wbans.pig +0 -19
- data/examples/munging/airports/usa_wbans.txt +0 -2157
- data/examples/munging/airports/wbans.pig +0 -19
- data/examples/munging/airports/wbans.txt +0 -2310
- data/examples/munging/rake_helper.rb +0 -62
- data/examples/munging/weather/.gitignore +0 -1
- data/examples/munging/weather/Gemfile +0 -4
- data/examples/munging/weather/Rakefile +0 -28
- data/examples/munging/weather/extract_ish.rb +0 -13
- data/examples/munging/weather/models/weather.rb +0 -119
- data/examples/munging/weather/utils/noaa_downloader.rb +0 -46
- data/examples/munging/wikipedia/README.md +0 -34
- data/examples/munging/wikipedia/Rakefile +0 -193
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +0 -18
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +0 -21
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +0 -27
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +0 -29
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +0 -14
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +0 -25
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +0 -29
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +0 -32
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +0 -85
- data/examples/munging/wikipedia/pig_style_guide.md +0 -25
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +0 -19
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +0 -23
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +0 -24
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +0 -22
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +0 -22
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +0 -26
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +0 -29
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +0 -24
- data/examples/munging/wikipedia/utils/get_namespaces.rb +0 -86
- data/examples/munging/wikipedia/utils/namespaces.json +0 -1
- data/examples/string_reverser.rb +0 -26
- data/examples/twitter/locations.rb +0 -29
- data/examples/twitter/models.rb +0 -24
- data/examples/twitter/pt1-fiddle.pig +0 -8
- data/examples/twitter/pt2-simple_parse.pig +0 -31
- data/examples/twitter/pt2-simple_parse.rb +0 -18
- data/examples/twitter/pt3-join_on_zips.pig +0 -39
- data/examples/twitter/pt4-strong_links.rb +0 -20
- data/examples/twitter/pt5-lnglat_and_strong_links.pig +0 -16
- data/examples/twitter/states.tsv +0 -50
- data/examples/workflow/package_gem.rb +0 -55
- data/lib/wukong/widget/sink.rb +0 -16
- data/lib/wukong/widget/source.rb +0 -14
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding:UTF-8
|
2
2
|
|
3
|
-
require 'multi_json'
|
4
|
-
|
5
3
|
module MungingUtils
|
6
4
|
extend self # you can call MungingUtils.foo, or include it and call on self.
|
7
5
|
|
@@ -37,14 +35,19 @@ module MungingUtils
|
|
37
35
|
text
|
38
36
|
end
|
39
37
|
|
38
|
+
# For follow-on escaping of XML-encoded text.
|
39
|
+
#
|
40
40
|
# Modifies the text in place, replacing all non-keyboard characters (newline,
|
41
41
|
# tab, anything not between ascii 0x20 and 0x7e) with their XML entity encoding
|
42
|
+
#
|
43
|
+
# NOTE: does NOT escape the ampersand character
|
44
|
+
# NOTE: modifies the text in-place
|
45
|
+
#
|
42
46
|
def safe_xml_encode(text)
|
43
|
-
text.gsub!(NON_PLAIN_ASCII_RE){|ch| "
|
47
|
+
text.gsub!(NON_PLAIN_ASCII_RE){|ch| "&#%04x;" % ch.ord } unless jsonized.ascii_only?
|
44
48
|
text
|
45
49
|
end
|
46
50
|
|
47
|
-
|
48
51
|
# Returns a JSON encoded string, with all non-ASCII characters escaped
|
49
52
|
def safe_json_encode(string)
|
50
53
|
jsonized = MultiJson.encode(string)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Wu
|
2
|
+
module Social
|
3
|
+
module Model
|
4
|
+
|
5
|
+
class TwitterUser
|
6
|
+
include Gorillib::Model
|
7
|
+
|
8
|
+
field :user_id, Integer
|
9
|
+
field :screen_name, String
|
10
|
+
field :created_at, Time
|
11
|
+
field :scraped_at, String
|
12
|
+
field :protected, Boolean
|
13
|
+
field :followers, Integer
|
14
|
+
field :friends, Integer
|
15
|
+
field :tweets_count, Integer
|
16
|
+
field :geo_enabled, :boolean
|
17
|
+
field :time_zone, String
|
18
|
+
field :utc_offset, String
|
19
|
+
field :lang, String
|
20
|
+
field :location, String
|
21
|
+
field :url, String
|
22
|
+
|
23
|
+
def receive_protected(val)
|
24
|
+
self.protected = (val.to_s == 'true' || val.to_s == '1')
|
25
|
+
end
|
26
|
+
def protected? ; !! self.protected ; end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
File without changes
|
data/lib/wukong.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
require 'configliere'
|
2
|
-
require 'gorillib/logger/log'
|
3
2
|
require 'vayacondios-client'
|
4
3
|
require 'multi_json'
|
4
|
+
require 'eventmachine'
|
5
|
+
require 'log4r'
|
5
6
|
|
6
7
|
require 'hanuman'
|
7
8
|
|
9
|
+
require 'wukong/logger'
|
8
10
|
require 'wukong/processor'
|
9
|
-
require 'wukong/driver'
|
10
|
-
|
11
|
-
require 'wukong/emitter'
|
12
11
|
require 'wukong/dataflow'
|
13
12
|
require 'wukong/configuration'
|
14
13
|
require 'wukong/widgets'
|
14
|
+
require 'wukong/driver'
|
15
|
+
require 'wukong/server'
|
16
|
+
# require 'wukong/runner'
|
15
17
|
|
16
18
|
# The Wukong module will contain all code for Wukong's core (like
|
17
19
|
# Processors and Dataflows) as well as all plugins.
|
@@ -29,3 +31,6 @@ module Wukong
|
|
29
31
|
add_shortcut_method_for(:dataflow, DataflowBuilder)
|
30
32
|
|
31
33
|
end
|
34
|
+
|
35
|
+
# Alias module name for shorter namespaces
|
36
|
+
Wu = Wukong
|
data/lib/wukong/boot.rb
CHANGED
@@ -58,11 +58,12 @@ module Wukong
|
|
58
58
|
#
|
59
59
|
# @return [String]
|
60
60
|
def self.deploy_pack_dir
|
61
|
+
return File.dirname(ENV["BUNDLE_GEMFILE"]) if ENV["BUNDLE_GEMFILE"] && is_deploy_pack_dir?(File.dirname(ENV["BUNDLE_GEMFILE"]))
|
61
62
|
return @deploy_pack_dir if @deploy_pack_dir
|
62
63
|
wd = Dir.pwd
|
63
64
|
parent = File.dirname(wd)
|
64
65
|
until wd == parent
|
65
|
-
return wd if
|
66
|
+
return wd if is_deploy_pack_dir?(wd)
|
66
67
|
wd = parent
|
67
68
|
parent = File.dirname(wd)
|
68
69
|
end
|
@@ -82,6 +83,14 @@ module Wukong
|
|
82
83
|
def self.load_environment
|
83
84
|
require environment_file
|
84
85
|
end
|
86
|
+
|
87
|
+
# Could `dir` be a deploy pack dir?
|
88
|
+
#
|
89
|
+
# @param [String] dir
|
90
|
+
# @return [true, false]
|
91
|
+
def self.is_deploy_pack_dir? dir
|
92
|
+
dir && !dir.empty? && File.directory?(dir) && File.exist?(File.join(dir, 'Gemfile')) && File.exist?(File.join(dir, 'config', 'environment.rb'))
|
93
|
+
end
|
85
94
|
end
|
86
95
|
|
87
96
|
end
|
data/lib/wukong/driver.rb
CHANGED
@@ -1,99 +1,93 @@
|
|
1
|
-
module Wukong
|
2
|
-
|
1
|
+
module Wukong
|
2
|
+
module DriverMethods
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
new(dataflow, options).run!
|
7
|
-
end
|
4
|
+
def driver
|
5
|
+
@driver ||= Driver.new(dataflow)
|
8
6
|
end
|
9
7
|
|
10
8
|
def lookup(label)
|
11
|
-
|
12
|
-
|
9
|
+
raise Wukong::Error.new("could not find definition for #{label}") unless Wukong.registry.registered?(label.to_sym)
|
10
|
+
Wukong.registry.retrieve(label.to_sym)
|
13
11
|
end
|
14
12
|
|
15
|
-
def
|
16
|
-
|
17
|
-
if flow.complete?
|
18
|
-
flow.links.each do |link|
|
19
|
-
Hanuman::LinkFactory.connect(using, flow.stages[link.from], flow.stages[link.into])
|
20
|
-
end
|
21
|
-
end
|
22
|
-
flow
|
13
|
+
def lookup_and_build(label, options = {})
|
14
|
+
lookup(label).build(options)
|
23
15
|
end
|
24
|
-
end
|
25
|
-
|
26
|
-
class LocalDriver < Driver
|
27
|
-
attr_accessor :dataflow, :source
|
28
16
|
|
29
|
-
def
|
30
|
-
|
31
|
-
dataflow = builder.build(options)
|
32
|
-
@dataflow = dataflow.respond_to?(:stages) ? dataflow.directed_sort.map{ |name| dataflow.stages[name] } : [ dataflow ]
|
33
|
-
@dataflow << Wukong.registry.retrieve(:stdout).build
|
34
|
-
@source = Wukong.registry.retrieve(:stdin).build
|
17
|
+
def build_serializer(direction, label, options)
|
18
|
+
lookup_and_build("#{direction}_#{label}", options)
|
35
19
|
end
|
36
20
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
return dataflow.first if stage.nil?
|
43
|
-
position = dataflow.find_index(stage)
|
44
|
-
dataflow[position + 1]
|
21
|
+
def add_serialization(dataflow, direction, label, options)
|
22
|
+
case direction
|
23
|
+
when :to then dataflow.push build_serializer(direction, label, options)
|
24
|
+
when :from then dataflow.unshift build_serializer(direction, label, options)
|
25
|
+
end
|
45
26
|
end
|
46
27
|
|
47
|
-
def
|
48
|
-
|
49
|
-
source.process(&blk)
|
28
|
+
def setup_dataflow
|
29
|
+
dataflow.each(&:setup)
|
50
30
|
end
|
51
31
|
|
52
|
-
def
|
53
|
-
dataflow.each(&:setup)
|
54
|
-
next_record do |record|
|
55
|
-
emitter.send_through_dataflow(record)
|
56
|
-
end
|
32
|
+
def finalize_and_stop_dataflow
|
57
33
|
dataflow.each do |stage|
|
58
|
-
stage.finalize(&
|
34
|
+
stage.finalize(&driver.advance(stage)) if stage.respond_to?(:finalize)
|
59
35
|
stage.stop
|
60
|
-
end
|
61
|
-
nil
|
36
|
+
end
|
62
37
|
end
|
38
|
+
|
39
|
+
# So pretty...
|
40
|
+
def construct_dataflow(label, options)
|
41
|
+
dataflow = lookup_and_build(label, options)
|
42
|
+
dataflow = dataflow.respond_to?(:stages) ? dataflow.directed_sort.map{ |name| dataflow.stages[name] } : [ dataflow ]
|
43
|
+
expected_input_model = (options[:consumes].constantize rescue nil) || dataflow.first.expected_record_type(:consumes)
|
44
|
+
dataflow.unshift lookup_and_build(:recordize, model: expected_input_model) if expected_input_model
|
45
|
+
expected_output_model = (options[:produces].constantize rescue nil) || dataflow.first.expected_record_type(:produces)
|
46
|
+
dataflow.push lookup_and_build(:recordize, model: expected_output_model) if expected_output_model
|
47
|
+
expected_input_serialization = options[:from] || dataflow.last.expected_serialization(:from)
|
48
|
+
add_serialization(dataflow, :from, expected_input_serialization, options) if expected_input_serialization
|
49
|
+
expected_output_serialization = options[:to] || dataflow.last.expected_serialization(:to)
|
50
|
+
add_serialization(dataflow, :to, expected_output_serialization, options) if expected_output_serialization
|
51
|
+
dataflow.push self
|
52
|
+
end
|
63
53
|
end
|
64
|
-
end
|
65
54
|
|
66
|
-
class
|
55
|
+
class Driver
|
56
|
+
attr_accessor :dataflow
|
67
57
|
|
68
|
-
|
58
|
+
def initialize(dataflow)
|
59
|
+
@dataflow = dataflow
|
60
|
+
end
|
69
61
|
|
70
|
-
|
71
|
-
|
72
|
-
|
62
|
+
def to_proc
|
63
|
+
return @wiring if @wiring
|
64
|
+
@wiring = Proc.new do |stage, record|
|
65
|
+
stage.process(record, &advance(stage)) if stage
|
66
|
+
end
|
67
|
+
end
|
73
68
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
69
|
+
def send_through_dataflow(record)
|
70
|
+
start_with(dataflow.first).call(record)
|
71
|
+
end
|
72
|
+
|
73
|
+
def start_with(stage)
|
74
|
+
to_proc.curry.call(stage)
|
78
75
|
end
|
79
|
-
end
|
80
76
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
def advance(stage)
|
86
|
-
next_stage = stage_iterator(stage)
|
87
|
-
self.to_proc.curry.call(next_stage)
|
88
|
-
end
|
77
|
+
def advance(stage)
|
78
|
+
next_stage = stage_iterator(stage)
|
79
|
+
start_with(next_stage)
|
80
|
+
end
|
89
81
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
82
|
+
# This should properly be defined on dataflow/builder
|
83
|
+
def stage_iterator(stage)
|
84
|
+
position = dataflow.find_index(stage)
|
85
|
+
dataflow[position + 1]
|
86
|
+
end
|
94
87
|
|
95
|
-
|
96
|
-
|
88
|
+
def call(*args)
|
89
|
+
to_proc.call(*args)
|
90
|
+
end
|
91
|
+
|
97
92
|
end
|
98
|
-
|
99
93
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Wukong
|
2
|
+
class LogFactory
|
3
|
+
|
4
|
+
attr_reader :created_log
|
5
|
+
|
6
|
+
def self.defaults
|
7
|
+
Log4r::StderrOutputter.new('console', formatter: Log4r::PatternFormatter.new(pattern: "%l %d [%-20c] -- %m"))
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.configure(klass, options = {})
|
11
|
+
factory = new(klass, options)
|
12
|
+
factory.created_log
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(logger, config)
|
16
|
+
@created_log = logger.is_a?(Log4r::Logger) ? logger : Log4r::Logger.new(logger.to_s)
|
17
|
+
outputter(LogFactory.defaults) unless ancestry_has_outputter?(@created_log)
|
18
|
+
apply_options(config)
|
19
|
+
end
|
20
|
+
|
21
|
+
def ancestry_has_outputter? lgr
|
22
|
+
if lgr.respond_to?(:outputters) && !lgr.outputters.empty?
|
23
|
+
true
|
24
|
+
elsif lgr.respond_to?(:parent) && !lgr.parent.nil?
|
25
|
+
ancestry_has_outputter? lgr.parent
|
26
|
+
else
|
27
|
+
false
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def apply_options config
|
32
|
+
config.each_pair do |option, value|
|
33
|
+
begin
|
34
|
+
send(option, value)
|
35
|
+
rescue
|
36
|
+
raise "invalid log option"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def outputter outptr
|
42
|
+
created_log.outputters = outptr
|
43
|
+
end
|
44
|
+
|
45
|
+
def level lvl
|
46
|
+
created_log.level = lookup_level(lvl.to_sym)
|
47
|
+
end
|
48
|
+
|
49
|
+
def lookup_level lvl
|
50
|
+
{
|
51
|
+
debug: Log4r::DEBUG,
|
52
|
+
info: Log4r::INFO,
|
53
|
+
warn: Log4r::WARN
|
54
|
+
}.fetch(lvl){ raise "invalid log level" }
|
55
|
+
end
|
56
|
+
|
57
|
+
def pattern ptrn
|
58
|
+
created_log.outputters.each do |output|
|
59
|
+
keep_date_format = output.formatter.date_pattern
|
60
|
+
output.formatter = Log4r::PatternFormatter.new(pattern: ptrn, date_format: keep_date_format)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def date_format fmt
|
65
|
+
created_log.outputters.each do |output|
|
66
|
+
keep_pattern = output.formatter.pattern
|
67
|
+
output.formatter = Log4r::PatternFormatter.new(pattern: keep_pattern, date_format: fmt)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Mixin for logging behavior
|
73
|
+
module Logging
|
74
|
+
|
75
|
+
def self.included klass
|
76
|
+
if klass.ancestors.include?(Gorillib::Model)
|
77
|
+
klass.class_eval do
|
78
|
+
field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) })
|
79
|
+
|
80
|
+
def receive_log params
|
81
|
+
@log = LogFactory.configure(self.class, params)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
else
|
85
|
+
klass.class_attribute :log
|
86
|
+
klass.log = LogFactory.configure(klass)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Default log. Parent to all logs created in Wukong namespace
|
92
|
+
Log = LogFactory.configure(Wukong)
|
93
|
+
end
|
data/lib/wukong/processor.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'log4r'
|
2
|
-
|
3
1
|
module Wukong
|
4
2
|
class ProcessorBuilder < Hanuman::StageBuilder
|
5
3
|
def namespace(*args)
|
@@ -14,25 +12,51 @@ module Wukong
|
|
14
12
|
# A Processor can be written and tested purely in Ruby and on your
|
15
13
|
# local machine. You can glue processors together
|
16
14
|
class Processor < Hanuman::Stage
|
15
|
+
include Logging
|
16
|
+
include Vayacondios::Notifications
|
17
17
|
|
18
18
|
field :action, Whatever
|
19
|
-
field :log, Whatever, :default => -> { log = Log4r::Logger.new(self.class.to_s) ; log.outputters = Log4r::StdoutOutputter.new('stdout', formatter: Log4r::PatternFormatter.new(pattern: "%d [%l] %c: %m")) ; log }
|
20
|
-
field :notifier, Vayacondios::NotifierFactory, :default => Vayacondios.default_notifier
|
21
19
|
|
22
|
-
|
23
|
-
@description = desc
|
24
|
-
end
|
20
|
+
class << self
|
25
21
|
|
26
|
-
|
27
|
-
|
28
|
-
|
22
|
+
def describe desc
|
23
|
+
@description = desc
|
24
|
+
end
|
25
|
+
|
26
|
+
def description
|
27
|
+
@description
|
28
|
+
end
|
29
|
+
|
30
|
+
def consumes(*args)
|
31
|
+
options = args.extract_options!
|
32
|
+
@consumes = options[:as]
|
33
|
+
validate_and_set_serialization(:from, args.first)
|
34
|
+
end
|
29
35
|
|
30
|
-
|
31
|
-
|
36
|
+
def produces(*args)
|
37
|
+
options = args.extract_options!
|
38
|
+
@produces = options[:as]
|
39
|
+
validate_and_set_serialization(:to, args.first)
|
40
|
+
end
|
41
|
+
|
42
|
+
def valid_serializer? label
|
43
|
+
label
|
44
|
+
end
|
32
45
|
|
33
|
-
|
34
|
-
|
46
|
+
def validate_and_set_serialization(direction, label)
|
47
|
+
instance_variable_set("@serialization_#{direction}", label) if %w[ tsv json xml ].include?(label.to_s)
|
48
|
+
end
|
35
49
|
|
50
|
+
end
|
51
|
+
|
52
|
+
def expected_record_type(type)
|
53
|
+
self.class.instance_variable_get("@#{type}")
|
54
|
+
end
|
55
|
+
|
56
|
+
def expected_serialization(direction)
|
57
|
+
self.class.instance_variable_get("@serialization_#{direction.to_s}")
|
58
|
+
end
|
59
|
+
|
36
60
|
# This is a placeholder method intended to be overridden
|
37
61
|
def perform_action(*args) ; end
|
38
62
|
|
@@ -41,21 +65,6 @@ module Wukong
|
|
41
65
|
self.define_singleton_method(:perform_action, &action)
|
42
66
|
end
|
43
67
|
|
44
|
-
# Valid notifier types are currently :http or :log
|
45
|
-
# This processor's log is passed to vayacondios
|
46
|
-
def receive_notifier(type)
|
47
|
-
if type.is_a?(Hash)
|
48
|
-
@notifier = Vayacondios::NotifierFactory.receive({type: 'log'}.merge(type))
|
49
|
-
else
|
50
|
-
@notifier = Vayacondios::NotifierFactory.receive(type: type, log: log)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Send information to Vayacondios; data goes in, the right thing happens
|
55
|
-
def notify(topic, cargo)
|
56
|
-
notifier.notify(topic, cargo)
|
57
|
-
end
|
58
|
-
|
59
68
|
# This method is called after the processor class has been instantiated
|
60
69
|
# but before any records are given to it to process
|
61
70
|
def setup
|