wukong 2.0.2 → 3.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (268) hide show
  1. data/.document +5 -0
  2. data/.gitignore +46 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +12 -0
  6. data/.yardopts +19 -0
  7. data/CHANGELOG.md +7 -0
  8. data/Gemfile +3 -0
  9. data/Guardfile +14 -0
  10. data/NOTES-travis.md +31 -0
  11. data/README.md +422 -0
  12. data/Rakefile +12 -0
  13. data/TODO.md +99 -0
  14. data/VERSION +1 -0
  15. data/bin/hdp-cp +0 -0
  16. data/bin/wu-flow +10 -0
  17. data/bin/wu-map +17 -0
  18. data/bin/wu-red +17 -0
  19. data/bin/wukong +17 -0
  20. data/data/CREDITS.md +355 -0
  21. data/data/graph/airfares.tsv +2174 -0
  22. data/data/text/gift_of_the_magi.txt +225 -0
  23. data/data/text/jabberwocky.txt +36 -0
  24. data/data/text/rectification_of_names.txt +33 -0
  25. data/{spec/data → data/twitter}/a_atsigns_b.tsv +0 -0
  26. data/{spec/data → data/twitter}/a_follows_b.tsv +0 -0
  27. data/{spec/data → data/twitter}/tweet.tsv +0 -0
  28. data/{spec/data → data/twitter}/twitter_user.tsv +0 -0
  29. data/data/wikipedia/dbpedia-sentences.tsv +1000 -0
  30. data/examples/dataflow.rb +28 -0
  31. data/examples/{server_logs/logline.rb → dataflow/apache_log_line.rb} +28 -18
  32. data/examples/dataflow/complex.rb +11 -0
  33. data/examples/dataflow/donuts.rb +13 -0
  34. data/examples/dataflow/parse_apache_logs.rb +16 -0
  35. data/examples/dataflow/pig_latinizer.rb +16 -0
  36. data/examples/dataflow/simple.rb +12 -0
  37. data/examples/dataflow/telegram.rb +45 -0
  38. data/examples/examples_helper.rb +9 -0
  39. data/examples/graph/minimum_spanning_tree.rb +73 -0
  40. data/examples/graph/union_find.rb +62 -0
  41. data/examples/text/latinize_text.rb +0 -0
  42. data/examples/text/pig_latin.rb +35 -0
  43. data/examples/tiny_count.rb +8 -0
  44. data/examples/tiny_count/jabberwocky_output.tsv +92 -0
  45. data/examples/twitter/locations.rb +29 -0
  46. data/examples/twitter/models.rb +24 -0
  47. data/examples/twitter/pt1-fiddle.pig +8 -0
  48. data/examples/twitter/pt2-simple_parse.pig +31 -0
  49. data/examples/twitter/pt2-simple_parse.rb +18 -0
  50. data/examples/twitter/pt3-join_on_zips.pig +39 -0
  51. data/examples/twitter/pt4-strong_links.rb +20 -0
  52. data/examples/twitter/pt5-lnglat_and_strong_links.pig +16 -0
  53. data/examples/twitter/states.tsv +50 -0
  54. data/examples/word_count.rb +36 -63
  55. data/examples/workflow/cherry_pie.md +104 -0
  56. data/examples/workflow/cherry_pie.rb +66 -0
  57. data/examples/workflow/fiddle.rb +24 -0
  58. data/examples/workflow/package_gem.rb +55 -0
  59. data/lib/{wukong/encoding.rb → away/escapement.rb} +0 -0
  60. data/lib/away/exe.rb +11 -0
  61. data/lib/away/experimental.rb +5 -0
  62. data/lib/away/from_file.rb +52 -0
  63. data/lib/away/job.rb +56 -0
  64. data/lib/away/job/rake_compat.rb +17 -0
  65. data/lib/away/registry.rb +79 -0
  66. data/lib/away/runner.rb +276 -0
  67. data/lib/away/runner/execute.rb +121 -0
  68. data/lib/away/script.rb +161 -0
  69. data/lib/away/script/hadoop_command.rb +240 -0
  70. data/lib/away/source/file_list_source.rb +15 -0
  71. data/lib/away/source/looper.rb +18 -0
  72. data/lib/away/task.rb +219 -0
  73. data/lib/hanuman.rb +9 -0
  74. data/lib/hanuman/action.rb +21 -0
  75. data/lib/hanuman/chain.rb +4 -0
  76. data/lib/hanuman/graph.rb +51 -0
  77. data/lib/hanuman/graphviz.rb +74 -0
  78. data/lib/hanuman/graphvizzer.rb +185 -0
  79. data/lib/hanuman/resource.rb +6 -0
  80. data/lib/hanuman/slot.rb +87 -0
  81. data/lib/hanuman/slottable.rb +220 -0
  82. data/lib/hanuman/stage.rb +51 -0
  83. data/lib/wukong.rb +31 -17
  84. data/lib/wukong/bad_record.rb +13 -16
  85. data/lib/wukong/dataflow.rb +103 -0
  86. data/lib/wukong/event.rb +44 -0
  87. data/lib/wukong/local_runner.rb +55 -0
  88. data/lib/wukong/mapred.rb +3 -0
  89. data/lib/wukong/model/faker.rb +136 -0
  90. data/lib/wukong/processor.rb +142 -0
  91. data/lib/wukong/settings.rb +0 -0
  92. data/lib/wukong/universe.rb +48 -0
  93. data/lib/wukong/version.rb +3 -0
  94. data/lib/wukong/widget/filter.rb +81 -0
  95. data/lib/wukong/widget/gibberish.rb +123 -0
  96. data/lib/wukong/widget/monitor.rb +26 -0
  97. data/lib/wukong/widget/reducer.rb +66 -0
  98. data/lib/wukong/widget/sink.rb +58 -0
  99. data/lib/wukong/widget/source.rb +120 -0
  100. data/lib/wukong/widget/stringifier.rb +50 -0
  101. data/lib/wukong/workflow.rb +22 -0
  102. data/lib/wukong/workflow/command.rb +42 -0
  103. data/old/config/emr-example.yaml +48 -0
  104. data/{examples → old/examples}/README.txt +0 -0
  105. data/{examples → old/examples}/contrib/jeans/README.markdown +0 -0
  106. data/{examples → old/examples}/contrib/jeans/data/normalized_sizes +0 -0
  107. data/{examples → old/examples}/contrib/jeans/data/orders.tsv +0 -0
  108. data/{examples → old/examples}/contrib/jeans/data/sizes +0 -0
  109. data/{examples → old/examples}/contrib/jeans/normalize.rb +0 -0
  110. data/{examples → old/examples}/contrib/jeans/sizes.rb +0 -0
  111. data/old/examples/corpus/bnc_word_freq.rb +44 -0
  112. data/{examples → old/examples}/corpus/bucket_counter.rb +0 -0
  113. data/{examples → old/examples}/corpus/dbpedia_abstract_to_sentences.rb +0 -0
  114. data/{examples → old/examples}/corpus/sentence_bigrams.rb +0 -0
  115. data/{examples → old/examples}/corpus/sentence_coocurrence.rb +0 -0
  116. data/old/examples/corpus/stopwords.rb +138 -0
  117. data/{examples → old/examples}/corpus/words_to_bigrams.rb +0 -0
  118. data/{examples → old/examples}/emr/README.textile +0 -0
  119. data/{examples → old/examples}/emr/dot_wukong_dir/credentials.json +0 -0
  120. data/{examples → old/examples}/emr/dot_wukong_dir/emr.yaml +0 -0
  121. data/{examples → old/examples}/emr/dot_wukong_dir/emr_bootstrap.sh +0 -0
  122. data/{examples → old/examples}/emr/elastic_mapreduce_example.rb +0 -0
  123. data/{examples → old/examples}/network_graph/adjacency_list.rb +0 -0
  124. data/{examples → old/examples}/network_graph/breadth_first_search.rb +0 -0
  125. data/{examples → old/examples}/network_graph/gen_2paths.rb +0 -0
  126. data/{examples → old/examples}/network_graph/gen_multi_edge.rb +0 -0
  127. data/{examples → old/examples}/network_graph/gen_symmetric_links.rb +0 -0
  128. data/{examples → old/examples}/pagerank/README.textile +0 -0
  129. data/{examples → old/examples}/pagerank/gen_initial_pagerank_graph.pig +0 -0
  130. data/{examples → old/examples}/pagerank/pagerank.rb +0 -0
  131. data/{examples → old/examples}/pagerank/pagerank_initialize.rb +0 -0
  132. data/{examples → old/examples}/pagerank/run_pagerank.sh +0 -0
  133. data/{examples → old/examples}/sample_records.rb +0 -0
  134. data/{examples → old/examples}/server_logs/apache_log_parser.rb +0 -4
  135. data/{examples → old/examples}/server_logs/breadcrumbs.rb +0 -0
  136. data/{examples → old/examples}/server_logs/nook.rb +0 -0
  137. data/{examples → old/examples}/server_logs/nook/faraday_dummy_adapter.rb +0 -0
  138. data/{examples → old/examples}/server_logs/user_agent.rb +0 -0
  139. data/{examples → old/examples}/simple_word_count.rb +0 -0
  140. data/{examples → old/examples}/size.rb +0 -0
  141. data/{examples → old/examples}/stats/avg_value_frequency.rb +0 -0
  142. data/{examples → old/examples}/stats/binning_percentile_estimator.rb +0 -0
  143. data/{examples → old/examples}/stats/data/avg_value_frequency.tsv +0 -0
  144. data/{examples → old/examples}/stats/rank_and_bin.rb +0 -0
  145. data/{examples → old/examples}/stupidly_simple_filter.rb +0 -0
  146. data/old/examples/word_count.rb +75 -0
  147. data/old/graph/graphviz_builder.rb +580 -0
  148. data/old/graph_easy/Attributes.pm +4181 -0
  149. data/old/graph_easy/Graphviz.pm +2232 -0
  150. data/old/wukong.rb +18 -0
  151. data/{lib → old}/wukong/and_pig.rb +0 -0
  152. data/old/wukong/bad_record.rb +18 -0
  153. data/{lib → old}/wukong/datatypes.rb +0 -0
  154. data/{lib → old}/wukong/datatypes/enum.rb +0 -0
  155. data/{lib → old}/wukong/datatypes/fake_types.rb +0 -0
  156. data/{lib → old}/wukong/decorator.rb +0 -0
  157. data/{lib → old}/wukong/encoding/asciize.rb +0 -0
  158. data/{lib → old}/wukong/extensions.rb +0 -0
  159. data/{lib → old}/wukong/extensions/array.rb +0 -0
  160. data/{lib → old}/wukong/extensions/blank.rb +0 -0
  161. data/{lib → old}/wukong/extensions/class.rb +0 -0
  162. data/{lib → old}/wukong/extensions/date_time.rb +0 -0
  163. data/{lib → old}/wukong/extensions/emittable.rb +0 -0
  164. data/{lib → old}/wukong/extensions/enumerable.rb +0 -0
  165. data/{lib → old}/wukong/extensions/hash.rb +0 -0
  166. data/{lib → old}/wukong/extensions/hash_keys.rb +0 -0
  167. data/{lib → old}/wukong/extensions/hash_like.rb +0 -0
  168. data/{lib → old}/wukong/extensions/hashlike_class.rb +0 -0
  169. data/{lib → old}/wukong/extensions/module.rb +0 -0
  170. data/{lib → old}/wukong/extensions/pathname.rb +0 -0
  171. data/{lib → old}/wukong/extensions/string.rb +0 -0
  172. data/{lib → old}/wukong/extensions/struct.rb +0 -0
  173. data/{lib → old}/wukong/extensions/symbol.rb +0 -0
  174. data/{lib → old}/wukong/filename_pattern.rb +0 -0
  175. data/old/wukong/helper.rb +7 -0
  176. data/old/wukong/helper/stopwords.rb +195 -0
  177. data/old/wukong/helper/tokenize.rb +35 -0
  178. data/{lib → old}/wukong/logger.rb +0 -0
  179. data/{lib → old}/wukong/periodic_monitor.rb +0 -0
  180. data/{lib → old}/wukong/schema.rb +0 -0
  181. data/{lib → old}/wukong/script.rb +0 -0
  182. data/{lib → old}/wukong/script/avro_command.rb +0 -0
  183. data/{lib → old}/wukong/script/cassandra_loader_script.rb +0 -0
  184. data/{lib → old}/wukong/script/emr_command.rb +0 -0
  185. data/{lib → old}/wukong/script/hadoop_command.rb +0 -0
  186. data/{lib → old}/wukong/script/local_command.rb +4 -1
  187. data/{lib → old}/wukong/store.rb +0 -0
  188. data/{lib → old}/wukong/store/base.rb +0 -0
  189. data/{lib → old}/wukong/store/cassandra.rb +0 -0
  190. data/{lib → old}/wukong/store/cassandra/streaming.rb +0 -0
  191. data/{lib → old}/wukong/store/cassandra/struct_loader.rb +0 -0
  192. data/{lib → old}/wukong/store/cassandra_model.rb +0 -0
  193. data/{lib → old}/wukong/store/chh_chunked_flat_file_store.rb +0 -0
  194. data/{lib → old}/wukong/store/chunked_flat_file_store.rb +0 -0
  195. data/{lib → old}/wukong/store/conditional_store.rb +0 -0
  196. data/{lib → old}/wukong/store/factory.rb +0 -0
  197. data/{lib → old}/wukong/store/flat_file_store.rb +0 -0
  198. data/{lib → old}/wukong/store/key_store.rb +0 -0
  199. data/{lib → old}/wukong/store/null_store.rb +0 -0
  200. data/{lib → old}/wukong/store/read_thru_store.rb +0 -0
  201. data/{lib → old}/wukong/store/tokyo_tdb_key_store.rb +0 -0
  202. data/{lib → old}/wukong/store/tyrant_rdb_key_store.rb +0 -0
  203. data/{lib → old}/wukong/store/tyrant_tdb_key_store.rb +0 -0
  204. data/{lib → old}/wukong/streamer.rb +8 -0
  205. data/{lib → old}/wukong/streamer/accumulating_reducer.rb +0 -0
  206. data/{lib → old}/wukong/streamer/base.rb +2 -1
  207. data/{lib → old}/wukong/streamer/counting_reducer.rb +0 -0
  208. data/{lib → old}/wukong/streamer/filter.rb +0 -0
  209. data/old/wukong/streamer/instance_streamer.rb +15 -0
  210. data/old/wukong/streamer/json_streamer.rb +21 -0
  211. data/{lib → old}/wukong/streamer/line_streamer.rb +0 -0
  212. data/{lib → old}/wukong/streamer/list_reducer.rb +0 -0
  213. data/{lib → old}/wukong/streamer/rank_and_bin_reducer.rb +0 -0
  214. data/{lib → old}/wukong/streamer/record_streamer.rb +0 -0
  215. data/{lib → old}/wukong/streamer/reducer.rb +0 -0
  216. data/{lib → old}/wukong/streamer/set_reducer.rb +0 -0
  217. data/{lib → old}/wukong/streamer/struct_streamer.rb +0 -0
  218. data/{lib → old}/wukong/streamer/summing_reducer.rb +0 -0
  219. data/{lib → old}/wukong/streamer/uniq_by_last_reducer.rb +0 -0
  220. data/{lib → old}/wukong/typed_struct.rb +0 -0
  221. data/spec/away/encoding_spec.rb +32 -0
  222. data/spec/away/exe_spec.rb +20 -0
  223. data/spec/away/flow_spec.rb +82 -0
  224. data/spec/away/graph_spec.rb +6 -0
  225. data/spec/away/job_spec.rb +15 -0
  226. data/spec/away/rake_compat_spec.rb +9 -0
  227. data/spec/away/script_spec.rb +81 -0
  228. data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
  229. data/spec/examples/dataflow/parsing_spec.rb +13 -0
  230. data/spec/examples/dataflow/simple_spec.rb +8 -0
  231. data/spec/examples/dataflow/telegram_spec.rb +43 -0
  232. data/spec/examples/graph/minimum_spanning_tree_spec.rb +35 -0
  233. data/spec/examples/text/pig_latin_spec.rb +21 -0
  234. data/spec/examples/workflow/cherry_pie_spec.rb +6 -0
  235. data/spec/hanuman/graph_spec.rb +17 -0
  236. data/spec/hanuman/graphviz_spec.rb +29 -0
  237. data/spec/hanuman/slot_spec.rb +2 -0
  238. data/spec/hanuman/stage_spec.rb +12 -0
  239. data/spec/spec_helper.rb +24 -6
  240. data/spec/support/examples_helper.rb +10 -0
  241. data/spec/support/hanuman_test_helpers.rb +90 -0
  242. data/spec/support/streamer_test_helpers.rb +6 -0
  243. data/spec/support/wukong_test_helpers.rb +43 -0
  244. data/spec/support/wukong_widget_helpers.rb +66 -0
  245. data/spec/wukong/dataflow_spec.rb +65 -0
  246. data/spec/wukong/local_runner_spec.rb +31 -0
  247. data/spec/wukong/model/faker_spec.rb +131 -0
  248. data/spec/wukong/processor_spec.rb +109 -0
  249. data/spec/wukong/runner_spec.rb +12 -0
  250. data/spec/wukong/widget/filter_spec.rb +99 -0
  251. data/spec/wukong/widget/sink_spec.rb +19 -0
  252. data/spec/wukong/widget/source_spec.rb +41 -0
  253. data/spec/wukong/widget/stringifier_spec.rb +51 -0
  254. data/spec/wukong/workflow/command_spec.rb +5 -0
  255. data/wukong.gemspec +36 -277
  256. metadata +421 -165
  257. data/CHANGELOG.textile +0 -106
  258. data/INSTALL.textile +0 -89
  259. data/README.textile +0 -274
  260. data/TODO.textile +0 -11
  261. data/examples/ignore_me/counting.rb +0 -55
  262. data/examples/ignore_me/grouper.rb +0 -71
  263. data/old/cassandra_streaming/berlitz_for_cassandra.textile +0 -22
  264. data/old/cassandra_streaming/client_interface_notes.textile +0 -200
  265. data/old/cassandra_streaming/client_schema.textile +0 -318
  266. data/old/cassandra_streaming/tuning.textile +0 -73
  267. data/spec/wukong/encoding_spec.rb +0 -36
  268. data/spec/wukong/script_spec.rb +0 -80
@@ -0,0 +1,6 @@
1
+ module Hanuman
2
+ class Resource < Stage
3
+ include Hanuman::IsOwnInputSlot
4
+ include Hanuman::IsOwnOutputSlot
5
+ end
6
+ end
@@ -0,0 +1,87 @@
1
+ module Hanuman
2
+
3
+ #
4
+ # Provides the methods required in order to accept inbound links.
5
+ # Including class must provide the input attribute and the owner method.
6
+ #
7
+ # @see IsOwnInputSlot
8
+ # @see Slottable
9
+ module Inlinkable
10
+ extend Gorillib::Concern
11
+
12
+ def set_input(stage)
13
+ write_attribute(:input, stage)
14
+ self
15
+ end
16
+
17
+ # wire another slot into this one
18
+ # @param other [Hanuman::Outlinkable] the other stage of slot
19
+ # @returns this object, for chaining
20
+ def <<(other)
21
+ from(other)
22
+ self
23
+ end
24
+
25
+ # wire another slot into this one
26
+ # @param other [Hanuman::Outlinkable] the other stage or slot
27
+ # @returns this object, for chaining
28
+ def from(other)
29
+ owner.connect(other, self)
30
+ self
31
+ end
32
+ end
33
+
34
+ #
35
+ # Provides the methods required in order to accept outbound links.
36
+ # Including class must provide the output attribute and the owner method.
37
+ #
38
+ # @see IsOwnOutputSlot
39
+ # @see Slottable
40
+ module Outlinkable
41
+ extend Gorillib::Concern
42
+
43
+ def set_output(stage)
44
+ write_attribute(:output, stage)
45
+ self
46
+ end
47
+
48
+ # wire this slot into another slot
49
+ # @param other [Hanuman::Slot] the other stage
50
+ # @returns the other slot
51
+ def >(other)
52
+ _, other = owner.connect(self, other)
53
+ other
54
+ end
55
+
56
+ # wire this stage's output into another stage's input
57
+ # @param other [Hanuman::Stage]the other stage
58
+ # @returns this stage, for chaining
59
+ def into(other)
60
+ owner.connect(self, other)
61
+ self
62
+ end
63
+ end
64
+
65
+ class Slot
66
+ include Gorillib::Builder
67
+ field :name, Symbol
68
+ field :stage, Hanuman::Stage
69
+ def owner
70
+ stage.owner
71
+ end
72
+ def to_key() name ; end
73
+ end
74
+
75
+ class InputSlot < Slot
76
+ include Hanuman::Inlinkable
77
+ magic :input, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph that feeds into this one'
78
+ def other() input ; end
79
+ end
80
+
81
+ class OutputSlot < Slot
82
+ include Hanuman::Outlinkable
83
+ magic :output, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph this one feeds into'
84
+ def other() ouput ; end
85
+ end
86
+
87
+ end
@@ -0,0 +1,220 @@
1
+ module Hanuman
2
+
3
+ #
4
+ # For stages that can be linked to directly
5
+ # Including this means your stage has exactly one input (itself).
6
+ #
7
+ module IsOwnInputSlot
8
+ extend Gorillib::Concern
9
+ include Inlinkable
10
+ included do
11
+ magic :input, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph that feeds into this one'
12
+ end
13
+ def inputs
14
+ input? ? [input] : []
15
+ end
16
+ end
17
+
18
+ #
19
+ # For stages that can be linked to directly
20
+ # Including this means your stage has exactly one output (itself).
21
+ #
22
+ module IsOwnOutputSlot
23
+ extend Gorillib::Concern
24
+ include Outlinkable
25
+ included do
26
+ magic :output, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph this one feeds into'
27
+ end
28
+ def outputs
29
+ output? ? [output] : []
30
+ end
31
+ end
32
+
33
+ #
34
+ # For stages with named slots
35
+ #
36
+ # A named slot is a special kind of field: saying
37
+ #
38
+ # consumes :brain
39
+ #
40
+ # gives your class
41
+ #
42
+ # * A normal attribute `brain_slot`
43
+ # * methods `brain_slot`, `receive_brain_slot` to go with it
44
+ # * method `brain`, returning the item (if any) connected to the brain slot
45
+ # * method `brain=` (alias for `receive_brain`) that links the brain slot with the given item
46
+ #
47
+ # @note that at the moment you can't have an input and an output with the same name.
48
+ #
49
+ module Slottable
50
+ extend Gorillib::Concern
51
+ include Inlinkable
52
+ include Outlinkable
53
+
54
+ included do
55
+ collection :outslots, Hanuman::OutputSlot, :key_method => :name
56
+ end
57
+
58
+ def inputs
59
+ inslots.to_a.map{|slot| slot.input }.compact
60
+ end
61
+
62
+ def inslots
63
+ self.class.inslot_fields.map{|_, slot_field| read_attribute(slot_field.name) }
64
+ end
65
+
66
+ def handle_extra_attributes(attrs)
67
+ self.class.inslot_fields.each do |_, field|
68
+ field_name = field.basename
69
+ next unless attrs.has_key?(field_name)
70
+ self.public_send(:"receive_#{field_name}", attrs.delete(field_name))
71
+ end
72
+ super(attrs)
73
+ end
74
+
75
+ module ClassMethods
76
+ def consumes(name, options={})
77
+ field name, Hanuman::Stage, {:field_type => InputSlotField}.merge(options)
78
+ end
79
+ def produces(name, options={})
80
+ field name, Hanuman::Stage, {:field_type => OutputSlotField}.merge(options)
81
+ end
82
+
83
+ def define_slot_reader(field)
84
+ meth_name = field.basename
85
+ slot_name = field.name
86
+ type = field.type
87
+ define_meta_module_method(meth_name, true) do ||
88
+ begin
89
+ slot = read_attribute(slot_name) or return nil
90
+ slot.other
91
+ rescue StandardError => err ; err.polish("#{self.class}.#{meth_name}") rescue nil ; raise ; end
92
+ end
93
+ end
94
+
95
+ def define_inslot_receiver(field)
96
+ meth_name = field.basename
97
+ slot_name = field.name
98
+ type = field.type
99
+ define_meta_module_method("receive_#{meth_name}", true) do |stage|
100
+ begin
101
+ slot = read_attribute(slot_name) or return nil
102
+ slot.from(stage)
103
+ self
104
+ rescue StandardError => err ; err.polish("#{self.class} set slot #{meth_name} to #{stage}") rescue nil ; raise ; end
105
+ end
106
+ meta_module.module_eval do
107
+ alias_method "#{meth_name}=", "receive_#{meth_name}"
108
+ end
109
+ end
110
+
111
+ def define_outslot_receiver(field)
112
+ meth_name = field.basename
113
+ slot_name = field.name
114
+ type = field.type
115
+ define_meta_module_method("receive_#{meth_name}", true) do |stage|
116
+ begin
117
+ slot = read_attribute(slot_name) or return nil
118
+ slot.into(stage)
119
+ self
120
+ rescue StandardError => err ; err.polish("#{self.class} set slot #{meth_name} to #{stage}") rescue nil ; raise ; end
121
+ end
122
+ meta_module.module_eval do
123
+ alias_method "#{meth_name}=", "receive_#{meth_name}"
124
+ end
125
+ end
126
+
127
+ def inslot_fields
128
+ fields.select{|_, field| field.is_a?(InputSlotField) }
129
+ end
130
+
131
+ def inslot_field?(field_name)
132
+ fields[field_name].is_a?(InputSlotField)
133
+ end
134
+ end
135
+
136
+ class SlotField < Gorillib::Model::Field
137
+ self.visibilities = visibilities.merge(:reader => true, :writer => false, :tester => false)
138
+ field :basename, Symbol
139
+ field :stage_type, Whatever, :doc => 'type for stages this slot accepts'
140
+ class_attribute :slot_type
141
+
142
+ def initialize(model, basename, type, options={})
143
+ name = "#{basename}_slot"
144
+ options[:stage_type] = type
145
+ slot_type = self.slot_type
146
+ options[:basename] = basename
147
+ options[:default] = ->{ slot_type.new(:name => basename, :stage => self) }
148
+ super(model, name, slot_type, options)
149
+ end
150
+ end
151
+
152
+ class InputSlotField < SlotField
153
+ self.slot_type = Hanuman::InputSlot
154
+ def inscribe_methods(model)
155
+ model.__send__(:define_slot_reader, self)
156
+ model.__send__(:define_inslot_receiver, self)
157
+ super
158
+ end
159
+ end
160
+
161
+ class OutputSlotField < SlotField
162
+ self.slot_type = Hanuman::OutputSlot
163
+ def inscribe_methods(model)
164
+ model.__send__(:define_slot_reader, self)
165
+ model.__send__(:define_outslot_receiver, self)
166
+ super
167
+ end
168
+ end
169
+
170
+ end # Slottable
171
+
172
+ module SplatInputs
173
+ extend Gorillib::Concern
174
+ include Slottable
175
+
176
+ included do
177
+ collection :splat_inslots, Hanuman::InputSlot, :key_method => :name
178
+ end
179
+
180
+ def set_input(stage)
181
+ slot = Hanuman::InputSlot.new(:name => stage.name, :stage => self, :input => stage)
182
+ self.splat_inslots << slot
183
+ slot
184
+ end
185
+
186
+ def has_input?(slot_name)
187
+ self.splat_inslots.keys.include?(slot_name)
188
+ end
189
+
190
+ def inslots
191
+ super + splat_inslots.to_a
192
+ end
193
+ end
194
+
195
+ module SplatOutputs
196
+ extend Gorillib::Concern
197
+ include Slottable
198
+
199
+ included do
200
+ collection :splat_outslots, Hanuman::OutputSlot, :key_method => :name
201
+ end
202
+
203
+ def set_output(stage)
204
+ slot = Hanuman::OutputSlot.new(
205
+ :name => stage.name, :stage => self, :output => stage)
206
+ self.outslots << slot
207
+ slot
208
+ end
209
+
210
+ def outputs
211
+ outslots.to_a.map{|slot| slot.output }
212
+ end
213
+
214
+ def into(*others)
215
+ others.each{|other| super(other)}
216
+ self
217
+ end
218
+ end
219
+
220
+ end
@@ -0,0 +1,51 @@
1
+ module Hanuman
2
+ class Stage
3
+ include Gorillib::Builder
4
+ alias_method :configure, :receive!
5
+
6
+ magic :name, Symbol, :doc => 'name for this stage; should be unique among other stages on its containing graph', :tester => true
7
+ field :owner, Whatever, :doc => 'the graph this stage sits in'
8
+ magic :doc, String, :doc => 'freeform description of this stage type'
9
+
10
+ # @returns the stage, namespaced by the graph that owns it
11
+ def fullname
12
+ [owner.try(:fullname), name].compact.join('.')
13
+ end
14
+
15
+ def self.handle
16
+ Gorillib::Inflector.underscore(Gorillib::Inflector.demodulize(self.name))
17
+ end
18
+
19
+ #
20
+ # Methods
21
+ #
22
+
23
+ # Called after the graph is constructed, before the flow is run
24
+ def setup
25
+ end
26
+
27
+ # Called to signal the flow should stop. Close any open connections, flush
28
+ # buffers, stop supervised projects, etc.
29
+ def stop
30
+ end
31
+
32
+ def lookup(stage)
33
+ owner.lookup(stage)
34
+ end
35
+
36
+ #
37
+ # Graph connections
38
+ #
39
+
40
+ def notify(msg)
41
+ true
42
+ end
43
+
44
+ def report
45
+ self.attributes
46
+ end
47
+
48
+ def to_key() name ; end
49
+ def key_method() :name ; end
50
+ end
51
+ end
@@ -1,18 +1,32 @@
1
- require 'wukong/extensions'
2
- require 'configliere'; Settings.use :define
3
- require 'wukong/datatypes'
4
- require 'wukong/periodic_monitor'
5
- require 'wukong/logger'
6
- autoload :BadRecord, 'wukong/bad_record'
7
- autoload :TypedStruct, 'wukong/typed_struct'
8
- module Wukong
9
- autoload :Script, 'wukong/script'
10
- autoload :Streamer, 'wukong/streamer'
11
- autoload :Store, 'wukong/store'
12
- autoload :FilenamePattern, 'wukong/filename_pattern'
13
- autoload :Decorator, 'wukong/decorator'
1
+ require 'multi_json'
14
2
 
15
- def self.run mapper, reducer=nil, options={}
16
- Wukong::Script.new(mapper, reducer, options).run
17
- end
18
- end
3
+ require 'configliere'
4
+ require 'gorillib/logger/log'
5
+ require 'gorillib/some'
6
+ require 'gorillib/builder'
7
+ require 'gorillib/model/serialization'
8
+
9
+ require 'wukong/settings'
10
+
11
+ require 'hanuman'
12
+
13
+ #
14
+ # Dataflow specific
15
+ #
16
+ require 'wukong/universe'
17
+ require 'wukong/dataflow'
18
+ require 'wukong/event'
19
+
20
+ require 'wukong/processor' # processes records in series
21
+ require 'wukong/widget/filter' # passes through only records that meet `accept?`
22
+ require 'wukong/widget/source' # generates raw records from outside
23
+ require 'wukong/widget/sink' # dispatch raw records to outside
24
+ require 'wukong/widget/stringifier' # converts raw blobs into structured records and vice/versa
25
+ require 'wukong/mapred' # the standard stream-sort-group-stream map/reduce flow
26
+ require 'wukong/local_runner'
27
+ require 'wukong/bad_record'
28
+
29
+ #
30
+ # Workflow Specific
31
+ #
32
+ require 'wukong/workflow/command'
@@ -1,18 +1,15 @@
1
- #
2
- # Easily serialize bad records in-band, for later analysis or to discard if
3
- # neglectable.
4
- #
5
- # You can instantiate this as
6
- # success = do_stuff_to record
7
- # if ! success
8
- # return BadRecord.new("do_stuff_to-failed", record)
9
- # end
10
- #
11
- class BadRecord < Struct.new(
12
- :errors,
13
- :record
14
- )
15
- def initialize errors='', *record_fields
16
- super errors, record_fields
1
+ class BadRecord
2
+ include Gorillib::Model
3
+ field :contents, Whatever, :doc => "The faulty contents; will be truncated at 1000 characters"
4
+ field :error, Exception, :doc => "Error (optional)"
5
+
6
+ def receive_contents(contents)
7
+ super contents.to_s[0..1000]
8
+ end
9
+
10
+ def make(contents, error=nil)
11
+ hsh = { :contents => contents }
12
+ hsh[:error] = error if error
13
+ receive(hsh)
17
14
  end
18
15
  end