wukong 2.0.2 → 3.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +46 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.travis.yml +12 -0
- data/.yardopts +19 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +3 -0
- data/Guardfile +14 -0
- data/NOTES-travis.md +31 -0
- data/README.md +422 -0
- data/Rakefile +12 -0
- data/TODO.md +99 -0
- data/VERSION +1 -0
- data/bin/hdp-cp +0 -0
- data/bin/wu-flow +10 -0
- data/bin/wu-map +17 -0
- data/bin/wu-red +17 -0
- data/bin/wukong +17 -0
- data/data/CREDITS.md +355 -0
- data/data/graph/airfares.tsv +2174 -0
- data/data/text/gift_of_the_magi.txt +225 -0
- data/data/text/jabberwocky.txt +36 -0
- data/data/text/rectification_of_names.txt +33 -0
- data/{spec/data → data/twitter}/a_atsigns_b.tsv +0 -0
- data/{spec/data → data/twitter}/a_follows_b.tsv +0 -0
- data/{spec/data → data/twitter}/tweet.tsv +0 -0
- data/{spec/data → data/twitter}/twitter_user.tsv +0 -0
- data/data/wikipedia/dbpedia-sentences.tsv +1000 -0
- data/examples/dataflow.rb +28 -0
- data/examples/{server_logs/logline.rb → dataflow/apache_log_line.rb} +28 -18
- data/examples/dataflow/complex.rb +11 -0
- data/examples/dataflow/donuts.rb +13 -0
- data/examples/dataflow/parse_apache_logs.rb +16 -0
- data/examples/dataflow/pig_latinizer.rb +16 -0
- data/examples/dataflow/simple.rb +12 -0
- data/examples/dataflow/telegram.rb +45 -0
- data/examples/examples_helper.rb +9 -0
- data/examples/graph/minimum_spanning_tree.rb +73 -0
- data/examples/graph/union_find.rb +62 -0
- data/examples/text/latinize_text.rb +0 -0
- data/examples/text/pig_latin.rb +35 -0
- data/examples/tiny_count.rb +8 -0
- data/examples/tiny_count/jabberwocky_output.tsv +92 -0
- data/examples/twitter/locations.rb +29 -0
- data/examples/twitter/models.rb +24 -0
- data/examples/twitter/pt1-fiddle.pig +8 -0
- data/examples/twitter/pt2-simple_parse.pig +31 -0
- data/examples/twitter/pt2-simple_parse.rb +18 -0
- data/examples/twitter/pt3-join_on_zips.pig +39 -0
- data/examples/twitter/pt4-strong_links.rb +20 -0
- data/examples/twitter/pt5-lnglat_and_strong_links.pig +16 -0
- data/examples/twitter/states.tsv +50 -0
- data/examples/word_count.rb +36 -63
- data/examples/workflow/cherry_pie.md +104 -0
- data/examples/workflow/cherry_pie.rb +66 -0
- data/examples/workflow/fiddle.rb +24 -0
- data/examples/workflow/package_gem.rb +55 -0
- data/lib/{wukong/encoding.rb → away/escapement.rb} +0 -0
- data/lib/away/exe.rb +11 -0
- data/lib/away/experimental.rb +5 -0
- data/lib/away/from_file.rb +52 -0
- data/lib/away/job.rb +56 -0
- data/lib/away/job/rake_compat.rb +17 -0
- data/lib/away/registry.rb +79 -0
- data/lib/away/runner.rb +276 -0
- data/lib/away/runner/execute.rb +121 -0
- data/lib/away/script.rb +161 -0
- data/lib/away/script/hadoop_command.rb +240 -0
- data/lib/away/source/file_list_source.rb +15 -0
- data/lib/away/source/looper.rb +18 -0
- data/lib/away/task.rb +219 -0
- data/lib/hanuman.rb +9 -0
- data/lib/hanuman/action.rb +21 -0
- data/lib/hanuman/chain.rb +4 -0
- data/lib/hanuman/graph.rb +51 -0
- data/lib/hanuman/graphviz.rb +74 -0
- data/lib/hanuman/graphvizzer.rb +185 -0
- data/lib/hanuman/resource.rb +6 -0
- data/lib/hanuman/slot.rb +87 -0
- data/lib/hanuman/slottable.rb +220 -0
- data/lib/hanuman/stage.rb +51 -0
- data/lib/wukong.rb +31 -17
- data/lib/wukong/bad_record.rb +13 -16
- data/lib/wukong/dataflow.rb +103 -0
- data/lib/wukong/event.rb +44 -0
- data/lib/wukong/local_runner.rb +55 -0
- data/lib/wukong/mapred.rb +3 -0
- data/lib/wukong/model/faker.rb +136 -0
- data/lib/wukong/processor.rb +142 -0
- data/lib/wukong/settings.rb +0 -0
- data/lib/wukong/universe.rb +48 -0
- data/lib/wukong/version.rb +3 -0
- data/lib/wukong/widget/filter.rb +81 -0
- data/lib/wukong/widget/gibberish.rb +123 -0
- data/lib/wukong/widget/monitor.rb +26 -0
- data/lib/wukong/widget/reducer.rb +66 -0
- data/lib/wukong/widget/sink.rb +58 -0
- data/lib/wukong/widget/source.rb +120 -0
- data/lib/wukong/widget/stringifier.rb +50 -0
- data/lib/wukong/workflow.rb +22 -0
- data/lib/wukong/workflow/command.rb +42 -0
- data/old/config/emr-example.yaml +48 -0
- data/{examples → old/examples}/README.txt +0 -0
- data/{examples → old/examples}/contrib/jeans/README.markdown +0 -0
- data/{examples → old/examples}/contrib/jeans/data/normalized_sizes +0 -0
- data/{examples → old/examples}/contrib/jeans/data/orders.tsv +0 -0
- data/{examples → old/examples}/contrib/jeans/data/sizes +0 -0
- data/{examples → old/examples}/contrib/jeans/normalize.rb +0 -0
- data/{examples → old/examples}/contrib/jeans/sizes.rb +0 -0
- data/old/examples/corpus/bnc_word_freq.rb +44 -0
- data/{examples → old/examples}/corpus/bucket_counter.rb +0 -0
- data/{examples → old/examples}/corpus/dbpedia_abstract_to_sentences.rb +0 -0
- data/{examples → old/examples}/corpus/sentence_bigrams.rb +0 -0
- data/{examples → old/examples}/corpus/sentence_coocurrence.rb +0 -0
- data/old/examples/corpus/stopwords.rb +138 -0
- data/{examples → old/examples}/corpus/words_to_bigrams.rb +0 -0
- data/{examples → old/examples}/emr/README.textile +0 -0
- data/{examples → old/examples}/emr/dot_wukong_dir/credentials.json +0 -0
- data/{examples → old/examples}/emr/dot_wukong_dir/emr.yaml +0 -0
- data/{examples → old/examples}/emr/dot_wukong_dir/emr_bootstrap.sh +0 -0
- data/{examples → old/examples}/emr/elastic_mapreduce_example.rb +0 -0
- data/{examples → old/examples}/network_graph/adjacency_list.rb +0 -0
- data/{examples → old/examples}/network_graph/breadth_first_search.rb +0 -0
- data/{examples → old/examples}/network_graph/gen_2paths.rb +0 -0
- data/{examples → old/examples}/network_graph/gen_multi_edge.rb +0 -0
- data/{examples → old/examples}/network_graph/gen_symmetric_links.rb +0 -0
- data/{examples → old/examples}/pagerank/README.textile +0 -0
- data/{examples → old/examples}/pagerank/gen_initial_pagerank_graph.pig +0 -0
- data/{examples → old/examples}/pagerank/pagerank.rb +0 -0
- data/{examples → old/examples}/pagerank/pagerank_initialize.rb +0 -0
- data/{examples → old/examples}/pagerank/run_pagerank.sh +0 -0
- data/{examples → old/examples}/sample_records.rb +0 -0
- data/{examples → old/examples}/server_logs/apache_log_parser.rb +0 -4
- data/{examples → old/examples}/server_logs/breadcrumbs.rb +0 -0
- data/{examples → old/examples}/server_logs/nook.rb +0 -0
- data/{examples → old/examples}/server_logs/nook/faraday_dummy_adapter.rb +0 -0
- data/{examples → old/examples}/server_logs/user_agent.rb +0 -0
- data/{examples → old/examples}/simple_word_count.rb +0 -0
- data/{examples → old/examples}/size.rb +0 -0
- data/{examples → old/examples}/stats/avg_value_frequency.rb +0 -0
- data/{examples → old/examples}/stats/binning_percentile_estimator.rb +0 -0
- data/{examples → old/examples}/stats/data/avg_value_frequency.tsv +0 -0
- data/{examples → old/examples}/stats/rank_and_bin.rb +0 -0
- data/{examples → old/examples}/stupidly_simple_filter.rb +0 -0
- data/old/examples/word_count.rb +75 -0
- data/old/graph/graphviz_builder.rb +580 -0
- data/old/graph_easy/Attributes.pm +4181 -0
- data/old/graph_easy/Graphviz.pm +2232 -0
- data/old/wukong.rb +18 -0
- data/{lib → old}/wukong/and_pig.rb +0 -0
- data/old/wukong/bad_record.rb +18 -0
- data/{lib → old}/wukong/datatypes.rb +0 -0
- data/{lib → old}/wukong/datatypes/enum.rb +0 -0
- data/{lib → old}/wukong/datatypes/fake_types.rb +0 -0
- data/{lib → old}/wukong/decorator.rb +0 -0
- data/{lib → old}/wukong/encoding/asciize.rb +0 -0
- data/{lib → old}/wukong/extensions.rb +0 -0
- data/{lib → old}/wukong/extensions/array.rb +0 -0
- data/{lib → old}/wukong/extensions/blank.rb +0 -0
- data/{lib → old}/wukong/extensions/class.rb +0 -0
- data/{lib → old}/wukong/extensions/date_time.rb +0 -0
- data/{lib → old}/wukong/extensions/emittable.rb +0 -0
- data/{lib → old}/wukong/extensions/enumerable.rb +0 -0
- data/{lib → old}/wukong/extensions/hash.rb +0 -0
- data/{lib → old}/wukong/extensions/hash_keys.rb +0 -0
- data/{lib → old}/wukong/extensions/hash_like.rb +0 -0
- data/{lib → old}/wukong/extensions/hashlike_class.rb +0 -0
- data/{lib → old}/wukong/extensions/module.rb +0 -0
- data/{lib → old}/wukong/extensions/pathname.rb +0 -0
- data/{lib → old}/wukong/extensions/string.rb +0 -0
- data/{lib → old}/wukong/extensions/struct.rb +0 -0
- data/{lib → old}/wukong/extensions/symbol.rb +0 -0
- data/{lib → old}/wukong/filename_pattern.rb +0 -0
- data/old/wukong/helper.rb +7 -0
- data/old/wukong/helper/stopwords.rb +195 -0
- data/old/wukong/helper/tokenize.rb +35 -0
- data/{lib → old}/wukong/logger.rb +0 -0
- data/{lib → old}/wukong/periodic_monitor.rb +0 -0
- data/{lib → old}/wukong/schema.rb +0 -0
- data/{lib → old}/wukong/script.rb +0 -0
- data/{lib → old}/wukong/script/avro_command.rb +0 -0
- data/{lib → old}/wukong/script/cassandra_loader_script.rb +0 -0
- data/{lib → old}/wukong/script/emr_command.rb +0 -0
- data/{lib → old}/wukong/script/hadoop_command.rb +0 -0
- data/{lib → old}/wukong/script/local_command.rb +4 -1
- data/{lib → old}/wukong/store.rb +0 -0
- data/{lib → old}/wukong/store/base.rb +0 -0
- data/{lib → old}/wukong/store/cassandra.rb +0 -0
- data/{lib → old}/wukong/store/cassandra/streaming.rb +0 -0
- data/{lib → old}/wukong/store/cassandra/struct_loader.rb +0 -0
- data/{lib → old}/wukong/store/cassandra_model.rb +0 -0
- data/{lib → old}/wukong/store/chh_chunked_flat_file_store.rb +0 -0
- data/{lib → old}/wukong/store/chunked_flat_file_store.rb +0 -0
- data/{lib → old}/wukong/store/conditional_store.rb +0 -0
- data/{lib → old}/wukong/store/factory.rb +0 -0
- data/{lib → old}/wukong/store/flat_file_store.rb +0 -0
- data/{lib → old}/wukong/store/key_store.rb +0 -0
- data/{lib → old}/wukong/store/null_store.rb +0 -0
- data/{lib → old}/wukong/store/read_thru_store.rb +0 -0
- data/{lib → old}/wukong/store/tokyo_tdb_key_store.rb +0 -0
- data/{lib → old}/wukong/store/tyrant_rdb_key_store.rb +0 -0
- data/{lib → old}/wukong/store/tyrant_tdb_key_store.rb +0 -0
- data/{lib → old}/wukong/streamer.rb +8 -0
- data/{lib → old}/wukong/streamer/accumulating_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/base.rb +2 -1
- data/{lib → old}/wukong/streamer/counting_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/filter.rb +0 -0
- data/old/wukong/streamer/instance_streamer.rb +15 -0
- data/old/wukong/streamer/json_streamer.rb +21 -0
- data/{lib → old}/wukong/streamer/line_streamer.rb +0 -0
- data/{lib → old}/wukong/streamer/list_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/rank_and_bin_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/record_streamer.rb +0 -0
- data/{lib → old}/wukong/streamer/reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/set_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/struct_streamer.rb +0 -0
- data/{lib → old}/wukong/streamer/summing_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/uniq_by_last_reducer.rb +0 -0
- data/{lib → old}/wukong/typed_struct.rb +0 -0
- data/spec/away/encoding_spec.rb +32 -0
- data/spec/away/exe_spec.rb +20 -0
- data/spec/away/flow_spec.rb +82 -0
- data/spec/away/graph_spec.rb +6 -0
- data/spec/away/job_spec.rb +15 -0
- data/spec/away/rake_compat_spec.rb +9 -0
- data/spec/away/script_spec.rb +81 -0
- data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
- data/spec/examples/dataflow/parsing_spec.rb +13 -0
- data/spec/examples/dataflow/simple_spec.rb +8 -0
- data/spec/examples/dataflow/telegram_spec.rb +43 -0
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +35 -0
- data/spec/examples/text/pig_latin_spec.rb +21 -0
- data/spec/examples/workflow/cherry_pie_spec.rb +6 -0
- data/spec/hanuman/graph_spec.rb +17 -0
- data/spec/hanuman/graphviz_spec.rb +29 -0
- data/spec/hanuman/slot_spec.rb +2 -0
- data/spec/hanuman/stage_spec.rb +12 -0
- data/spec/spec_helper.rb +24 -6
- data/spec/support/examples_helper.rb +10 -0
- data/spec/support/hanuman_test_helpers.rb +90 -0
- data/spec/support/streamer_test_helpers.rb +6 -0
- data/spec/support/wukong_test_helpers.rb +43 -0
- data/spec/support/wukong_widget_helpers.rb +66 -0
- data/spec/wukong/dataflow_spec.rb +65 -0
- data/spec/wukong/local_runner_spec.rb +31 -0
- data/spec/wukong/model/faker_spec.rb +131 -0
- data/spec/wukong/processor_spec.rb +109 -0
- data/spec/wukong/runner_spec.rb +12 -0
- data/spec/wukong/widget/filter_spec.rb +99 -0
- data/spec/wukong/widget/sink_spec.rb +19 -0
- data/spec/wukong/widget/source_spec.rb +41 -0
- data/spec/wukong/widget/stringifier_spec.rb +51 -0
- data/spec/wukong/workflow/command_spec.rb +5 -0
- data/wukong.gemspec +36 -277
- metadata +421 -165
- data/CHANGELOG.textile +0 -106
- data/INSTALL.textile +0 -89
- data/README.textile +0 -274
- data/TODO.textile +0 -11
- data/examples/ignore_me/counting.rb +0 -55
- data/examples/ignore_me/grouper.rb +0 -71
- data/old/cassandra_streaming/berlitz_for_cassandra.textile +0 -22
- data/old/cassandra_streaming/client_interface_notes.textile +0 -200
- data/old/cassandra_streaming/client_schema.textile +0 -318
- data/old/cassandra_streaming/tuning.textile +0 -73
- data/spec/wukong/encoding_spec.rb +0 -36
- data/spec/wukong/script_spec.rb +0 -80
data/lib/hanuman/slot.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
module Hanuman
|
2
|
+
|
3
|
+
#
|
4
|
+
# Provides the methods required in order to accept inbound links.
|
5
|
+
# Including class must provide the input attribute and the owner method.
|
6
|
+
#
|
7
|
+
# @see IsOwnInputSlot
|
8
|
+
# @see Slottable
|
9
|
+
module Inlinkable
|
10
|
+
extend Gorillib::Concern
|
11
|
+
|
12
|
+
def set_input(stage)
|
13
|
+
write_attribute(:input, stage)
|
14
|
+
self
|
15
|
+
end
|
16
|
+
|
17
|
+
# wire another slot into this one
|
18
|
+
# @param other [Hanuman::Outlinkable] the other stage of slot
|
19
|
+
# @returns this object, for chaining
|
20
|
+
def <<(other)
|
21
|
+
from(other)
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
# wire another slot into this one
|
26
|
+
# @param other [Hanuman::Outlinkable] the other stage or slot
|
27
|
+
# @returns this object, for chaining
|
28
|
+
def from(other)
|
29
|
+
owner.connect(other, self)
|
30
|
+
self
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Provides the methods required in order to accept outbound links.
|
36
|
+
# Including class must provide the output attribute and the owner method.
|
37
|
+
#
|
38
|
+
# @see IsOwnOutputSlot
|
39
|
+
# @see Slottable
|
40
|
+
module Outlinkable
|
41
|
+
extend Gorillib::Concern
|
42
|
+
|
43
|
+
def set_output(stage)
|
44
|
+
write_attribute(:output, stage)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
# wire this slot into another slot
|
49
|
+
# @param other [Hanuman::Slot] the other stage
|
50
|
+
# @returns the other slot
|
51
|
+
def >(other)
|
52
|
+
_, other = owner.connect(self, other)
|
53
|
+
other
|
54
|
+
end
|
55
|
+
|
56
|
+
# wire this stage's output into another stage's input
|
57
|
+
# @param other [Hanuman::Stage]the other stage
|
58
|
+
# @returns this stage, for chaining
|
59
|
+
def into(other)
|
60
|
+
owner.connect(self, other)
|
61
|
+
self
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Slot
|
66
|
+
include Gorillib::Builder
|
67
|
+
field :name, Symbol
|
68
|
+
field :stage, Hanuman::Stage
|
69
|
+
def owner
|
70
|
+
stage.owner
|
71
|
+
end
|
72
|
+
def to_key() name ; end
|
73
|
+
end
|
74
|
+
|
75
|
+
class InputSlot < Slot
|
76
|
+
include Hanuman::Inlinkable
|
77
|
+
magic :input, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph that feeds into this one'
|
78
|
+
def other() input ; end
|
79
|
+
end
|
80
|
+
|
81
|
+
class OutputSlot < Slot
|
82
|
+
include Hanuman::Outlinkable
|
83
|
+
magic :output, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph this one feeds into'
|
84
|
+
def other() ouput ; end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,220 @@
|
|
1
|
+
module Hanuman
|
2
|
+
|
3
|
+
#
|
4
|
+
# For stages that can be linked to directly
|
5
|
+
# Including this means your stage has exactly one input (itself).
|
6
|
+
#
|
7
|
+
module IsOwnInputSlot
|
8
|
+
extend Gorillib::Concern
|
9
|
+
include Inlinkable
|
10
|
+
included do
|
11
|
+
magic :input, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph that feeds into this one'
|
12
|
+
end
|
13
|
+
def inputs
|
14
|
+
input? ? [input] : []
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# For stages that can be linked to directly
|
20
|
+
# Including this means your stage has exactly one output (itself).
|
21
|
+
#
|
22
|
+
module IsOwnOutputSlot
|
23
|
+
extend Gorillib::Concern
|
24
|
+
include Outlinkable
|
25
|
+
included do
|
26
|
+
magic :output, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph this one feeds into'
|
27
|
+
end
|
28
|
+
def outputs
|
29
|
+
output? ? [output] : []
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# For stages with named slots
|
35
|
+
#
|
36
|
+
# A named slot is a special kind of field: saying
|
37
|
+
#
|
38
|
+
# consumes :brain
|
39
|
+
#
|
40
|
+
# gives your class
|
41
|
+
#
|
42
|
+
# * A normal attribute `brain_slot`
|
43
|
+
# * methods `brain_slot`, `receive_brain_slot` to go with it
|
44
|
+
# * method `brain`, returning the item (if any) connected to the brain slot
|
45
|
+
# * method `brain=` (alias for `receive_brain`) that links the brain slot with the given item
|
46
|
+
#
|
47
|
+
# @note that at the moment you can't have an input and an output with the same name.
|
48
|
+
#
|
49
|
+
module Slottable
|
50
|
+
extend Gorillib::Concern
|
51
|
+
include Inlinkable
|
52
|
+
include Outlinkable
|
53
|
+
|
54
|
+
included do
|
55
|
+
collection :outslots, Hanuman::OutputSlot, :key_method => :name
|
56
|
+
end
|
57
|
+
|
58
|
+
def inputs
|
59
|
+
inslots.to_a.map{|slot| slot.input }.compact
|
60
|
+
end
|
61
|
+
|
62
|
+
def inslots
|
63
|
+
self.class.inslot_fields.map{|_, slot_field| read_attribute(slot_field.name) }
|
64
|
+
end
|
65
|
+
|
66
|
+
def handle_extra_attributes(attrs)
|
67
|
+
self.class.inslot_fields.each do |_, field|
|
68
|
+
field_name = field.basename
|
69
|
+
next unless attrs.has_key?(field_name)
|
70
|
+
self.public_send(:"receive_#{field_name}", attrs.delete(field_name))
|
71
|
+
end
|
72
|
+
super(attrs)
|
73
|
+
end
|
74
|
+
|
75
|
+
module ClassMethods
|
76
|
+
def consumes(name, options={})
|
77
|
+
field name, Hanuman::Stage, {:field_type => InputSlotField}.merge(options)
|
78
|
+
end
|
79
|
+
def produces(name, options={})
|
80
|
+
field name, Hanuman::Stage, {:field_type => OutputSlotField}.merge(options)
|
81
|
+
end
|
82
|
+
|
83
|
+
def define_slot_reader(field)
|
84
|
+
meth_name = field.basename
|
85
|
+
slot_name = field.name
|
86
|
+
type = field.type
|
87
|
+
define_meta_module_method(meth_name, true) do ||
|
88
|
+
begin
|
89
|
+
slot = read_attribute(slot_name) or return nil
|
90
|
+
slot.other
|
91
|
+
rescue StandardError => err ; err.polish("#{self.class}.#{meth_name}") rescue nil ; raise ; end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def define_inslot_receiver(field)
|
96
|
+
meth_name = field.basename
|
97
|
+
slot_name = field.name
|
98
|
+
type = field.type
|
99
|
+
define_meta_module_method("receive_#{meth_name}", true) do |stage|
|
100
|
+
begin
|
101
|
+
slot = read_attribute(slot_name) or return nil
|
102
|
+
slot.from(stage)
|
103
|
+
self
|
104
|
+
rescue StandardError => err ; err.polish("#{self.class} set slot #{meth_name} to #{stage}") rescue nil ; raise ; end
|
105
|
+
end
|
106
|
+
meta_module.module_eval do
|
107
|
+
alias_method "#{meth_name}=", "receive_#{meth_name}"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def define_outslot_receiver(field)
|
112
|
+
meth_name = field.basename
|
113
|
+
slot_name = field.name
|
114
|
+
type = field.type
|
115
|
+
define_meta_module_method("receive_#{meth_name}", true) do |stage|
|
116
|
+
begin
|
117
|
+
slot = read_attribute(slot_name) or return nil
|
118
|
+
slot.into(stage)
|
119
|
+
self
|
120
|
+
rescue StandardError => err ; err.polish("#{self.class} set slot #{meth_name} to #{stage}") rescue nil ; raise ; end
|
121
|
+
end
|
122
|
+
meta_module.module_eval do
|
123
|
+
alias_method "#{meth_name}=", "receive_#{meth_name}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def inslot_fields
|
128
|
+
fields.select{|_, field| field.is_a?(InputSlotField) }
|
129
|
+
end
|
130
|
+
|
131
|
+
def inslot_field?(field_name)
|
132
|
+
fields[field_name].is_a?(InputSlotField)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
class SlotField < Gorillib::Model::Field
|
137
|
+
self.visibilities = visibilities.merge(:reader => true, :writer => false, :tester => false)
|
138
|
+
field :basename, Symbol
|
139
|
+
field :stage_type, Whatever, :doc => 'type for stages this slot accepts'
|
140
|
+
class_attribute :slot_type
|
141
|
+
|
142
|
+
def initialize(model, basename, type, options={})
|
143
|
+
name = "#{basename}_slot"
|
144
|
+
options[:stage_type] = type
|
145
|
+
slot_type = self.slot_type
|
146
|
+
options[:basename] = basename
|
147
|
+
options[:default] = ->{ slot_type.new(:name => basename, :stage => self) }
|
148
|
+
super(model, name, slot_type, options)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
class InputSlotField < SlotField
|
153
|
+
self.slot_type = Hanuman::InputSlot
|
154
|
+
def inscribe_methods(model)
|
155
|
+
model.__send__(:define_slot_reader, self)
|
156
|
+
model.__send__(:define_inslot_receiver, self)
|
157
|
+
super
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class OutputSlotField < SlotField
|
162
|
+
self.slot_type = Hanuman::OutputSlot
|
163
|
+
def inscribe_methods(model)
|
164
|
+
model.__send__(:define_slot_reader, self)
|
165
|
+
model.__send__(:define_outslot_receiver, self)
|
166
|
+
super
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
end # Slottable
|
171
|
+
|
172
|
+
module SplatInputs
|
173
|
+
extend Gorillib::Concern
|
174
|
+
include Slottable
|
175
|
+
|
176
|
+
included do
|
177
|
+
collection :splat_inslots, Hanuman::InputSlot, :key_method => :name
|
178
|
+
end
|
179
|
+
|
180
|
+
def set_input(stage)
|
181
|
+
slot = Hanuman::InputSlot.new(:name => stage.name, :stage => self, :input => stage)
|
182
|
+
self.splat_inslots << slot
|
183
|
+
slot
|
184
|
+
end
|
185
|
+
|
186
|
+
def has_input?(slot_name)
|
187
|
+
self.splat_inslots.keys.include?(slot_name)
|
188
|
+
end
|
189
|
+
|
190
|
+
def inslots
|
191
|
+
super + splat_inslots.to_a
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
module SplatOutputs
|
196
|
+
extend Gorillib::Concern
|
197
|
+
include Slottable
|
198
|
+
|
199
|
+
included do
|
200
|
+
collection :splat_outslots, Hanuman::OutputSlot, :key_method => :name
|
201
|
+
end
|
202
|
+
|
203
|
+
def set_output(stage)
|
204
|
+
slot = Hanuman::OutputSlot.new(
|
205
|
+
:name => stage.name, :stage => self, :output => stage)
|
206
|
+
self.outslots << slot
|
207
|
+
slot
|
208
|
+
end
|
209
|
+
|
210
|
+
def outputs
|
211
|
+
outslots.to_a.map{|slot| slot.output }
|
212
|
+
end
|
213
|
+
|
214
|
+
def into(*others)
|
215
|
+
others.each{|other| super(other)}
|
216
|
+
self
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Hanuman
|
2
|
+
class Stage
|
3
|
+
include Gorillib::Builder
|
4
|
+
alias_method :configure, :receive!
|
5
|
+
|
6
|
+
magic :name, Symbol, :doc => 'name for this stage; should be unique among other stages on its containing graph', :tester => true
|
7
|
+
field :owner, Whatever, :doc => 'the graph this stage sits in'
|
8
|
+
magic :doc, String, :doc => 'freeform description of this stage type'
|
9
|
+
|
10
|
+
# @returns the stage, namespaced by the graph that owns it
|
11
|
+
def fullname
|
12
|
+
[owner.try(:fullname), name].compact.join('.')
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.handle
|
16
|
+
Gorillib::Inflector.underscore(Gorillib::Inflector.demodulize(self.name))
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Methods
|
21
|
+
#
|
22
|
+
|
23
|
+
# Called after the graph is constructed, before the flow is run
|
24
|
+
def setup
|
25
|
+
end
|
26
|
+
|
27
|
+
# Called to signal the flow should stop. Close any open connections, flush
|
28
|
+
# buffers, stop supervised projects, etc.
|
29
|
+
def stop
|
30
|
+
end
|
31
|
+
|
32
|
+
def lookup(stage)
|
33
|
+
owner.lookup(stage)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Graph connections
|
38
|
+
#
|
39
|
+
|
40
|
+
def notify(msg)
|
41
|
+
true
|
42
|
+
end
|
43
|
+
|
44
|
+
def report
|
45
|
+
self.attributes
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_key() name ; end
|
49
|
+
def key_method() :name ; end
|
50
|
+
end
|
51
|
+
end
|
data/lib/wukong.rb
CHANGED
@@ -1,18 +1,32 @@
|
|
1
|
-
require '
|
2
|
-
require 'configliere'; Settings.use :define
|
3
|
-
require 'wukong/datatypes'
|
4
|
-
require 'wukong/periodic_monitor'
|
5
|
-
require 'wukong/logger'
|
6
|
-
autoload :BadRecord, 'wukong/bad_record'
|
7
|
-
autoload :TypedStruct, 'wukong/typed_struct'
|
8
|
-
module Wukong
|
9
|
-
autoload :Script, 'wukong/script'
|
10
|
-
autoload :Streamer, 'wukong/streamer'
|
11
|
-
autoload :Store, 'wukong/store'
|
12
|
-
autoload :FilenamePattern, 'wukong/filename_pattern'
|
13
|
-
autoload :Decorator, 'wukong/decorator'
|
1
|
+
require 'multi_json'
|
14
2
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
3
|
+
require 'configliere'
|
4
|
+
require 'gorillib/logger/log'
|
5
|
+
require 'gorillib/some'
|
6
|
+
require 'gorillib/builder'
|
7
|
+
require 'gorillib/model/serialization'
|
8
|
+
|
9
|
+
require 'wukong/settings'
|
10
|
+
|
11
|
+
require 'hanuman'
|
12
|
+
|
13
|
+
#
|
14
|
+
# Dataflow specific
|
15
|
+
#
|
16
|
+
require 'wukong/universe'
|
17
|
+
require 'wukong/dataflow'
|
18
|
+
require 'wukong/event'
|
19
|
+
|
20
|
+
require 'wukong/processor' # processes records in series
|
21
|
+
require 'wukong/widget/filter' # passes through only records that meet `accept?`
|
22
|
+
require 'wukong/widget/source' # generates raw records from outside
|
23
|
+
require 'wukong/widget/sink' # dispatch raw records to outside
|
24
|
+
require 'wukong/widget/stringifier' # converts raw blobs into structured records and vice/versa
|
25
|
+
require 'wukong/mapred' # the standard stream-sort-group-stream map/reduce flow
|
26
|
+
require 'wukong/local_runner'
|
27
|
+
require 'wukong/bad_record'
|
28
|
+
|
29
|
+
#
|
30
|
+
# Workflow Specific
|
31
|
+
#
|
32
|
+
require 'wukong/workflow/command'
|
data/lib/wukong/bad_record.rb
CHANGED
@@ -1,18 +1,15 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
:
|
13
|
-
|
14
|
-
)
|
15
|
-
def initialize errors='', *record_fields
|
16
|
-
super errors, record_fields
|
1
|
+
class BadRecord
|
2
|
+
include Gorillib::Model
|
3
|
+
field :contents, Whatever, :doc => "The faulty contents; will be truncated at 1000 characters"
|
4
|
+
field :error, Exception, :doc => "Error (optional)"
|
5
|
+
|
6
|
+
def receive_contents(contents)
|
7
|
+
super contents.to_s[0..1000]
|
8
|
+
end
|
9
|
+
|
10
|
+
def make(contents, error=nil)
|
11
|
+
hsh = { :contents => contents }
|
12
|
+
hsh[:error] = error if error
|
13
|
+
receive(hsh)
|
17
14
|
end
|
18
15
|
end
|