wukong 2.0.2 → 3.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (268) hide show
  1. data/.document +5 -0
  2. data/.gitignore +46 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +12 -0
  6. data/.yardopts +19 -0
  7. data/CHANGELOG.md +7 -0
  8. data/Gemfile +3 -0
  9. data/Guardfile +14 -0
  10. data/NOTES-travis.md +31 -0
  11. data/README.md +422 -0
  12. data/Rakefile +12 -0
  13. data/TODO.md +99 -0
  14. data/VERSION +1 -0
  15. data/bin/hdp-cp +0 -0
  16. data/bin/wu-flow +10 -0
  17. data/bin/wu-map +17 -0
  18. data/bin/wu-red +17 -0
  19. data/bin/wukong +17 -0
  20. data/data/CREDITS.md +355 -0
  21. data/data/graph/airfares.tsv +2174 -0
  22. data/data/text/gift_of_the_magi.txt +225 -0
  23. data/data/text/jabberwocky.txt +36 -0
  24. data/data/text/rectification_of_names.txt +33 -0
  25. data/{spec/data → data/twitter}/a_atsigns_b.tsv +0 -0
  26. data/{spec/data → data/twitter}/a_follows_b.tsv +0 -0
  27. data/{spec/data → data/twitter}/tweet.tsv +0 -0
  28. data/{spec/data → data/twitter}/twitter_user.tsv +0 -0
  29. data/data/wikipedia/dbpedia-sentences.tsv +1000 -0
  30. data/examples/dataflow.rb +28 -0
  31. data/examples/{server_logs/logline.rb → dataflow/apache_log_line.rb} +28 -18
  32. data/examples/dataflow/complex.rb +11 -0
  33. data/examples/dataflow/donuts.rb +13 -0
  34. data/examples/dataflow/parse_apache_logs.rb +16 -0
  35. data/examples/dataflow/pig_latinizer.rb +16 -0
  36. data/examples/dataflow/simple.rb +12 -0
  37. data/examples/dataflow/telegram.rb +45 -0
  38. data/examples/examples_helper.rb +9 -0
  39. data/examples/graph/minimum_spanning_tree.rb +73 -0
  40. data/examples/graph/union_find.rb +62 -0
  41. data/examples/text/latinize_text.rb +0 -0
  42. data/examples/text/pig_latin.rb +35 -0
  43. data/examples/tiny_count.rb +8 -0
  44. data/examples/tiny_count/jabberwocky_output.tsv +92 -0
  45. data/examples/twitter/locations.rb +29 -0
  46. data/examples/twitter/models.rb +24 -0
  47. data/examples/twitter/pt1-fiddle.pig +8 -0
  48. data/examples/twitter/pt2-simple_parse.pig +31 -0
  49. data/examples/twitter/pt2-simple_parse.rb +18 -0
  50. data/examples/twitter/pt3-join_on_zips.pig +39 -0
  51. data/examples/twitter/pt4-strong_links.rb +20 -0
  52. data/examples/twitter/pt5-lnglat_and_strong_links.pig +16 -0
  53. data/examples/twitter/states.tsv +50 -0
  54. data/examples/word_count.rb +36 -63
  55. data/examples/workflow/cherry_pie.md +104 -0
  56. data/examples/workflow/cherry_pie.rb +66 -0
  57. data/examples/workflow/fiddle.rb +24 -0
  58. data/examples/workflow/package_gem.rb +55 -0
  59. data/lib/{wukong/encoding.rb → away/escapement.rb} +0 -0
  60. data/lib/away/exe.rb +11 -0
  61. data/lib/away/experimental.rb +5 -0
  62. data/lib/away/from_file.rb +52 -0
  63. data/lib/away/job.rb +56 -0
  64. data/lib/away/job/rake_compat.rb +17 -0
  65. data/lib/away/registry.rb +79 -0
  66. data/lib/away/runner.rb +276 -0
  67. data/lib/away/runner/execute.rb +121 -0
  68. data/lib/away/script.rb +161 -0
  69. data/lib/away/script/hadoop_command.rb +240 -0
  70. data/lib/away/source/file_list_source.rb +15 -0
  71. data/lib/away/source/looper.rb +18 -0
  72. data/lib/away/task.rb +219 -0
  73. data/lib/hanuman.rb +9 -0
  74. data/lib/hanuman/action.rb +21 -0
  75. data/lib/hanuman/chain.rb +4 -0
  76. data/lib/hanuman/graph.rb +51 -0
  77. data/lib/hanuman/graphviz.rb +74 -0
  78. data/lib/hanuman/graphvizzer.rb +185 -0
  79. data/lib/hanuman/resource.rb +6 -0
  80. data/lib/hanuman/slot.rb +87 -0
  81. data/lib/hanuman/slottable.rb +220 -0
  82. data/lib/hanuman/stage.rb +51 -0
  83. data/lib/wukong.rb +31 -17
  84. data/lib/wukong/bad_record.rb +13 -16
  85. data/lib/wukong/dataflow.rb +103 -0
  86. data/lib/wukong/event.rb +44 -0
  87. data/lib/wukong/local_runner.rb +55 -0
  88. data/lib/wukong/mapred.rb +3 -0
  89. data/lib/wukong/model/faker.rb +136 -0
  90. data/lib/wukong/processor.rb +142 -0
  91. data/lib/wukong/settings.rb +0 -0
  92. data/lib/wukong/universe.rb +48 -0
  93. data/lib/wukong/version.rb +3 -0
  94. data/lib/wukong/widget/filter.rb +81 -0
  95. data/lib/wukong/widget/gibberish.rb +123 -0
  96. data/lib/wukong/widget/monitor.rb +26 -0
  97. data/lib/wukong/widget/reducer.rb +66 -0
  98. data/lib/wukong/widget/sink.rb +58 -0
  99. data/lib/wukong/widget/source.rb +120 -0
  100. data/lib/wukong/widget/stringifier.rb +50 -0
  101. data/lib/wukong/workflow.rb +22 -0
  102. data/lib/wukong/workflow/command.rb +42 -0
  103. data/old/config/emr-example.yaml +48 -0
  104. data/{examples → old/examples}/README.txt +0 -0
  105. data/{examples → old/examples}/contrib/jeans/README.markdown +0 -0
  106. data/{examples → old/examples}/contrib/jeans/data/normalized_sizes +0 -0
  107. data/{examples → old/examples}/contrib/jeans/data/orders.tsv +0 -0
  108. data/{examples → old/examples}/contrib/jeans/data/sizes +0 -0
  109. data/{examples → old/examples}/contrib/jeans/normalize.rb +0 -0
  110. data/{examples → old/examples}/contrib/jeans/sizes.rb +0 -0
  111. data/old/examples/corpus/bnc_word_freq.rb +44 -0
  112. data/{examples → old/examples}/corpus/bucket_counter.rb +0 -0
  113. data/{examples → old/examples}/corpus/dbpedia_abstract_to_sentences.rb +0 -0
  114. data/{examples → old/examples}/corpus/sentence_bigrams.rb +0 -0
  115. data/{examples → old/examples}/corpus/sentence_coocurrence.rb +0 -0
  116. data/old/examples/corpus/stopwords.rb +138 -0
  117. data/{examples → old/examples}/corpus/words_to_bigrams.rb +0 -0
  118. data/{examples → old/examples}/emr/README.textile +0 -0
  119. data/{examples → old/examples}/emr/dot_wukong_dir/credentials.json +0 -0
  120. data/{examples → old/examples}/emr/dot_wukong_dir/emr.yaml +0 -0
  121. data/{examples → old/examples}/emr/dot_wukong_dir/emr_bootstrap.sh +0 -0
  122. data/{examples → old/examples}/emr/elastic_mapreduce_example.rb +0 -0
  123. data/{examples → old/examples}/network_graph/adjacency_list.rb +0 -0
  124. data/{examples → old/examples}/network_graph/breadth_first_search.rb +0 -0
  125. data/{examples → old/examples}/network_graph/gen_2paths.rb +0 -0
  126. data/{examples → old/examples}/network_graph/gen_multi_edge.rb +0 -0
  127. data/{examples → old/examples}/network_graph/gen_symmetric_links.rb +0 -0
  128. data/{examples → old/examples}/pagerank/README.textile +0 -0
  129. data/{examples → old/examples}/pagerank/gen_initial_pagerank_graph.pig +0 -0
  130. data/{examples → old/examples}/pagerank/pagerank.rb +0 -0
  131. data/{examples → old/examples}/pagerank/pagerank_initialize.rb +0 -0
  132. data/{examples → old/examples}/pagerank/run_pagerank.sh +0 -0
  133. data/{examples → old/examples}/sample_records.rb +0 -0
  134. data/{examples → old/examples}/server_logs/apache_log_parser.rb +0 -4
  135. data/{examples → old/examples}/server_logs/breadcrumbs.rb +0 -0
  136. data/{examples → old/examples}/server_logs/nook.rb +0 -0
  137. data/{examples → old/examples}/server_logs/nook/faraday_dummy_adapter.rb +0 -0
  138. data/{examples → old/examples}/server_logs/user_agent.rb +0 -0
  139. data/{examples → old/examples}/simple_word_count.rb +0 -0
  140. data/{examples → old/examples}/size.rb +0 -0
  141. data/{examples → old/examples}/stats/avg_value_frequency.rb +0 -0
  142. data/{examples → old/examples}/stats/binning_percentile_estimator.rb +0 -0
  143. data/{examples → old/examples}/stats/data/avg_value_frequency.tsv +0 -0
  144. data/{examples → old/examples}/stats/rank_and_bin.rb +0 -0
  145. data/{examples → old/examples}/stupidly_simple_filter.rb +0 -0
  146. data/old/examples/word_count.rb +75 -0
  147. data/old/graph/graphviz_builder.rb +580 -0
  148. data/old/graph_easy/Attributes.pm +4181 -0
  149. data/old/graph_easy/Graphviz.pm +2232 -0
  150. data/old/wukong.rb +18 -0
  151. data/{lib → old}/wukong/and_pig.rb +0 -0
  152. data/old/wukong/bad_record.rb +18 -0
  153. data/{lib → old}/wukong/datatypes.rb +0 -0
  154. data/{lib → old}/wukong/datatypes/enum.rb +0 -0
  155. data/{lib → old}/wukong/datatypes/fake_types.rb +0 -0
  156. data/{lib → old}/wukong/decorator.rb +0 -0
  157. data/{lib → old}/wukong/encoding/asciize.rb +0 -0
  158. data/{lib → old}/wukong/extensions.rb +0 -0
  159. data/{lib → old}/wukong/extensions/array.rb +0 -0
  160. data/{lib → old}/wukong/extensions/blank.rb +0 -0
  161. data/{lib → old}/wukong/extensions/class.rb +0 -0
  162. data/{lib → old}/wukong/extensions/date_time.rb +0 -0
  163. data/{lib → old}/wukong/extensions/emittable.rb +0 -0
  164. data/{lib → old}/wukong/extensions/enumerable.rb +0 -0
  165. data/{lib → old}/wukong/extensions/hash.rb +0 -0
  166. data/{lib → old}/wukong/extensions/hash_keys.rb +0 -0
  167. data/{lib → old}/wukong/extensions/hash_like.rb +0 -0
  168. data/{lib → old}/wukong/extensions/hashlike_class.rb +0 -0
  169. data/{lib → old}/wukong/extensions/module.rb +0 -0
  170. data/{lib → old}/wukong/extensions/pathname.rb +0 -0
  171. data/{lib → old}/wukong/extensions/string.rb +0 -0
  172. data/{lib → old}/wukong/extensions/struct.rb +0 -0
  173. data/{lib → old}/wukong/extensions/symbol.rb +0 -0
  174. data/{lib → old}/wukong/filename_pattern.rb +0 -0
  175. data/old/wukong/helper.rb +7 -0
  176. data/old/wukong/helper/stopwords.rb +195 -0
  177. data/old/wukong/helper/tokenize.rb +35 -0
  178. data/{lib → old}/wukong/logger.rb +0 -0
  179. data/{lib → old}/wukong/periodic_monitor.rb +0 -0
  180. data/{lib → old}/wukong/schema.rb +0 -0
  181. data/{lib → old}/wukong/script.rb +0 -0
  182. data/{lib → old}/wukong/script/avro_command.rb +0 -0
  183. data/{lib → old}/wukong/script/cassandra_loader_script.rb +0 -0
  184. data/{lib → old}/wukong/script/emr_command.rb +0 -0
  185. data/{lib → old}/wukong/script/hadoop_command.rb +0 -0
  186. data/{lib → old}/wukong/script/local_command.rb +4 -1
  187. data/{lib → old}/wukong/store.rb +0 -0
  188. data/{lib → old}/wukong/store/base.rb +0 -0
  189. data/{lib → old}/wukong/store/cassandra.rb +0 -0
  190. data/{lib → old}/wukong/store/cassandra/streaming.rb +0 -0
  191. data/{lib → old}/wukong/store/cassandra/struct_loader.rb +0 -0
  192. data/{lib → old}/wukong/store/cassandra_model.rb +0 -0
  193. data/{lib → old}/wukong/store/chh_chunked_flat_file_store.rb +0 -0
  194. data/{lib → old}/wukong/store/chunked_flat_file_store.rb +0 -0
  195. data/{lib → old}/wukong/store/conditional_store.rb +0 -0
  196. data/{lib → old}/wukong/store/factory.rb +0 -0
  197. data/{lib → old}/wukong/store/flat_file_store.rb +0 -0
  198. data/{lib → old}/wukong/store/key_store.rb +0 -0
  199. data/{lib → old}/wukong/store/null_store.rb +0 -0
  200. data/{lib → old}/wukong/store/read_thru_store.rb +0 -0
  201. data/{lib → old}/wukong/store/tokyo_tdb_key_store.rb +0 -0
  202. data/{lib → old}/wukong/store/tyrant_rdb_key_store.rb +0 -0
  203. data/{lib → old}/wukong/store/tyrant_tdb_key_store.rb +0 -0
  204. data/{lib → old}/wukong/streamer.rb +8 -0
  205. data/{lib → old}/wukong/streamer/accumulating_reducer.rb +0 -0
  206. data/{lib → old}/wukong/streamer/base.rb +2 -1
  207. data/{lib → old}/wukong/streamer/counting_reducer.rb +0 -0
  208. data/{lib → old}/wukong/streamer/filter.rb +0 -0
  209. data/old/wukong/streamer/instance_streamer.rb +15 -0
  210. data/old/wukong/streamer/json_streamer.rb +21 -0
  211. data/{lib → old}/wukong/streamer/line_streamer.rb +0 -0
  212. data/{lib → old}/wukong/streamer/list_reducer.rb +0 -0
  213. data/{lib → old}/wukong/streamer/rank_and_bin_reducer.rb +0 -0
  214. data/{lib → old}/wukong/streamer/record_streamer.rb +0 -0
  215. data/{lib → old}/wukong/streamer/reducer.rb +0 -0
  216. data/{lib → old}/wukong/streamer/set_reducer.rb +0 -0
  217. data/{lib → old}/wukong/streamer/struct_streamer.rb +0 -0
  218. data/{lib → old}/wukong/streamer/summing_reducer.rb +0 -0
  219. data/{lib → old}/wukong/streamer/uniq_by_last_reducer.rb +0 -0
  220. data/{lib → old}/wukong/typed_struct.rb +0 -0
  221. data/spec/away/encoding_spec.rb +32 -0
  222. data/spec/away/exe_spec.rb +20 -0
  223. data/spec/away/flow_spec.rb +82 -0
  224. data/spec/away/graph_spec.rb +6 -0
  225. data/spec/away/job_spec.rb +15 -0
  226. data/spec/away/rake_compat_spec.rb +9 -0
  227. data/spec/away/script_spec.rb +81 -0
  228. data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
  229. data/spec/examples/dataflow/parsing_spec.rb +13 -0
  230. data/spec/examples/dataflow/simple_spec.rb +8 -0
  231. data/spec/examples/dataflow/telegram_spec.rb +43 -0
  232. data/spec/examples/graph/minimum_spanning_tree_spec.rb +35 -0
  233. data/spec/examples/text/pig_latin_spec.rb +21 -0
  234. data/spec/examples/workflow/cherry_pie_spec.rb +6 -0
  235. data/spec/hanuman/graph_spec.rb +17 -0
  236. data/spec/hanuman/graphviz_spec.rb +29 -0
  237. data/spec/hanuman/slot_spec.rb +2 -0
  238. data/spec/hanuman/stage_spec.rb +12 -0
  239. data/spec/spec_helper.rb +24 -6
  240. data/spec/support/examples_helper.rb +10 -0
  241. data/spec/support/hanuman_test_helpers.rb +90 -0
  242. data/spec/support/streamer_test_helpers.rb +6 -0
  243. data/spec/support/wukong_test_helpers.rb +43 -0
  244. data/spec/support/wukong_widget_helpers.rb +66 -0
  245. data/spec/wukong/dataflow_spec.rb +65 -0
  246. data/spec/wukong/local_runner_spec.rb +31 -0
  247. data/spec/wukong/model/faker_spec.rb +131 -0
  248. data/spec/wukong/processor_spec.rb +109 -0
  249. data/spec/wukong/runner_spec.rb +12 -0
  250. data/spec/wukong/widget/filter_spec.rb +99 -0
  251. data/spec/wukong/widget/sink_spec.rb +19 -0
  252. data/spec/wukong/widget/source_spec.rb +41 -0
  253. data/spec/wukong/widget/stringifier_spec.rb +51 -0
  254. data/spec/wukong/workflow/command_spec.rb +5 -0
  255. data/wukong.gemspec +36 -277
  256. metadata +421 -165
  257. data/CHANGELOG.textile +0 -106
  258. data/INSTALL.textile +0 -89
  259. data/README.textile +0 -274
  260. data/TODO.textile +0 -11
  261. data/examples/ignore_me/counting.rb +0 -55
  262. data/examples/ignore_me/grouper.rb +0 -71
  263. data/old/cassandra_streaming/berlitz_for_cassandra.textile +0 -22
  264. data/old/cassandra_streaming/client_interface_notes.textile +0 -200
  265. data/old/cassandra_streaming/client_schema.textile +0 -318
  266. data/old/cassandra_streaming/tuning.textile +0 -73
  267. data/spec/wukong/encoding_spec.rb +0 -36
  268. data/spec/wukong/script_spec.rb +0 -80
@@ -0,0 +1,15 @@
1
+ module Wukong
2
+ module Streamer
3
+ #
4
+ # Instantiate an instance of 'record_model' for each line
5
+ class InstanceStreamer < Wukong::Streamer::RecordStreamer
6
+ class_attribute :record_model
7
+
8
+ def recordize(raw_record)
9
+ fields = super(raw_record)
10
+ [ record_model.new(*fields) ] if fields
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ module Wukong
2
+ module Streamer
3
+ #
4
+ #
5
+ # Note: it's on you to `require 'json'` somewhere (there's too many
6
+ class JsonStreamer < Wukong::Streamer::Base
7
+
8
+ #
9
+ # Parses the incoming record as JSON, returns a single arg to #process
10
+ #
11
+ def recordize line
12
+ begin
13
+ [JSON.parse(line)]
14
+ rescue StandardError => boom
15
+ bad_record!(boom, line.to_s)
16
+ end
17
+ end
18
+
19
+ end
20
+ end
21
+ end
File without changes
File without changes
@@ -0,0 +1,32 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong/encoding'
3
+ #
4
+ # describe "Wukong encoding" do
5
+ #
6
+ # it 'en/decodes to xml by default' do
7
+ # Wukong.encode_str("&" ).should == '&amp;'
8
+ # Wukong.decode_str("&amp;" ).should == '&'
9
+ # end
10
+ # it 'en/decodes to xml with :xml' do
11
+ # Wukong.encode_str("&", :xml ).should == '&amp;'
12
+ # Wukong.decode_str("&amp;", :xml ).should == '&'
13
+ # end
14
+ # it 'url en/decodes with :url' do
15
+ # Wukong.encode_str("&", :url ).should == '%26'
16
+ # Wukong.decode_str("%26", :url ).should == '&'
17
+ # end
18
+ # { "'" => "&apos;", "\t" => "&#9;", "\n" => "&#10;", nil => '',}.each do |raw, enc|
19
+ # it 'encodes #{raw} to #{enc}' do
20
+ # Wukong.encode_str(raw, :xml ).should == enc
21
+ # end
22
+ # it 'decodes #{enc} to #{raw}' do
23
+ # Wukong.decode_str(enc, :xml ).should == raw.to_s
24
+ # end
25
+ # end
26
+ # ["normal_string with %punctuation should `not be molested", ""].each do |str|
27
+ # it 'doesn\'t change #{str}' do
28
+ # Wukong.encode_str(str, :xml ).should == str
29
+ # end
30
+ # end
31
+ #
32
+ # end
@@ -0,0 +1,20 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ #
4
+ # describe 'auto runner for scripts', :helpers => true do
5
+ #
6
+ # context 'at exit hook' do
7
+ #
8
+ # it 'defines a hook to run on exit'
9
+ #
10
+ # it 'does not run a script twice'
11
+ #
12
+ # it 'runs a script based on its invoked name (so that it works with symlinks)'
13
+ #
14
+ # end
15
+ #
16
+ # end
17
+ #
18
+ # describe 'wukong shell at exit hook' do
19
+ # it 'runs pry at correct point'
20
+ # end
@@ -0,0 +1,82 @@
1
+ require 'spec_helper'
2
+ require 'wukong'
3
+
4
+ # describe 'wukong', :helpers => true do
5
+ # subject{ described_class.new(:example) }
6
+ #
7
+ # describe Wukong::Flow do
8
+ # let(:test_sink){ test_array_sink }
9
+ # let(:example_flow) do
10
+ # test_sink = test_sink()
11
+ # Wukong.flow(:simple) do
12
+ # source(:iter, 1..100) | limit(7) | test_sink
13
+ # end
14
+ # end
15
+ #
16
+ # context '#add_stage' do
17
+ # it 'creates right subclass of Wukong::Source' do
18
+ # subject.add_stage(:source, :iter, []).should be_a(Wukong::Source::Iter)
19
+ # end
20
+ # end
21
+ #
22
+ # it 'works with a simple example' do
23
+ # example_flow.run
24
+ # test_array_sink.records.should == (1..7).to_a
25
+ # end
26
+ #
27
+ # # context '#run' do
28
+ # # let(:test_sink){ mock }
29
+ # # it 'announces events and calls methods in right order' do
30
+ # # test_sink = test_sink()
31
+ # # test_sink.should_receive(:tell).with(:beg_stream).ordered
32
+ # # test_sink.should_receive(:call).exactly(7).times.ordered
33
+ # # test_sink.should_receive(:finally).once.ordered
34
+ # # test_sink.should_receive(:tell).with(:end_stream).ordered
35
+ # # example_flow.run
36
+ # # end
37
+ # # end
38
+ #
39
+ # context '#stdin' do
40
+ # its(:stdin){ should be_a(Wukong::Source::Iter) }
41
+ # end
42
+ # context '#stdout' do
43
+ # its(:stdout){ should be_a(Wukong::Sink::Stdout) }
44
+ # end
45
+ # context '#stderr' do
46
+ # its(:stderr){ should be_a(Wukong::Sink::Stderr) }
47
+ # end
48
+ #
49
+ # end
50
+ #
51
+ # describe Wukong do
52
+ # context '.streamer' do
53
+ # subject{ Wukong.streamer('from_meth'){ def call(rec) rec.reverse ; end ; def bob() 1 ; end } }
54
+ # it 'raises an error if the handle is not a valid identifier' do
55
+ # ->{ Wukong.streamer('1love') }.should raise_error(ArgumentError, /no funny/)
56
+ # ->{ Wukong.streamer('this/that') }.should raise_error(ArgumentError, /no funny/)
57
+ # ->{ Wukong.streamer('This::That') }.should raise_error(ArgumentError, /no funny/)
58
+ # end
59
+ #
60
+ # it{ should < Wukong::Streamer }
61
+ # it{ should be_method_defined(:call) }
62
+ # it{ should be_method_defined(:bob) }
63
+ #
64
+ # it 'defines a constant in Wukong::Streamer' do
65
+ # subject.to_s.should == 'Wukong::Streamer::FromMeth'
66
+ # Wukong::Streamer.should be_const_defined(:FromMeth)
67
+ # end
68
+ # it 'raises if already defined' do
69
+ # subject
70
+ # ->{ Wukong.streamer('from_meth') }.should raise_error(ArgumentError, /already defined/i)
71
+ # end
72
+ #
73
+ # it 'works as expected' do
74
+ # subject.new.call("hi mom").should == "mom ih"
75
+ # subject.new.bob.should == 1
76
+ # end
77
+ #
78
+ # after{ Wukong::Streamer.send(:remove_const, :FromMeth) if Wukong::Streamer.const_defined?(:FromMeth) }
79
+ # end
80
+ # end
81
+ #
82
+ # end
@@ -0,0 +1,6 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ #
4
+ # describe Wukong::Graph, :helpers => true do
5
+ #
6
+ # end
@@ -0,0 +1,15 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ #
4
+ # describe Wukong::Job, :helpers => true do
5
+ #
6
+ # context '#output_dir' do
7
+ # it 'has filename helpers'
8
+ # end
9
+ #
10
+ # context '#dry_run' do
11
+ # it 'does nothing when dry run flag is set'
12
+ #
13
+ # it 'announces each foregone action using Log.info'
14
+ # end
15
+ # end
@@ -0,0 +1,9 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ #
4
+ # describe 'rake compatibility', :helpers => true do
5
+ # it 'loads rake'
6
+ # it 'warns if the rake DSL is included at global level'
7
+ #
8
+ # it 'plays nice with Rails'
9
+ # end
@@ -0,0 +1,81 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong/runner/hadoop'
3
+ #
4
+ # describe "Wukong::Runner::Hadoop" do
5
+ # before do
6
+ # ARGV.replace []
7
+ # @script = Wukong::Script.new 'mapper', 'reducer'
8
+ # end
9
+ #
10
+ # describe 'initialize' do
11
+ # it 'sets :reduce_tasks to 0 if reducer_klass is nil and no reduce_command or explicit setting' do
12
+ # @script = Wukong::Script.new 'mapper', nil
13
+ # @script.options[:reduce_tasks].should == 0
14
+ # end
15
+ # it 'respects :reduce_tasks if set even if reducer_klass is nil' do
16
+ # @script = Wukong::Script.new 'mapper', nil, :reduce_tasks => 1
17
+ # @script.options[:reduce_tasks].should == 1
18
+ # end
19
+ # it "doesn't set :reduce_tasks reduce_command is given" do
20
+ # @script = Wukong::Script.new 'mapper', nil, :reduce_command => 1
21
+ # @script.options[:reduce_tasks].should be_nil
22
+ # end
23
+ # it 'sets mapper_klass in initializer' do
24
+ # @script.mapper_klass.should == 'mapper'
25
+ # end
26
+ # it 'sets reducer_klass in initializer' do
27
+ # @script.reducer_klass.should == 'reducer'
28
+ # end
29
+ # end
30
+ #
31
+ # describe 'child processes' do
32
+ # it 'calls self if a mapper_klass is set' do
33
+ # @script.should_receive(:ruby_interpreter_path).and_return('/path/to/ruby')
34
+ # @script.should_receive(:this_script_filename).and_return('/path/to/this_script')
35
+ # @script.map_command.should == %Q{/path/to/ruby /path/to/this_script --map }
36
+ # end
37
+ # it 'calls default_mapper if nil mapper_klass and no map_command is set' do
38
+ # @script = Wukong::Script.new nil, 'reducer', :default_mapper => 'default_mapper'
39
+ # @script.map_command.should == 'default_mapper'
40
+ # end
41
+ # it 'calls map_command if nil mapper_klass and map_command is set' do
42
+ # @script = Wukong::Script.new nil, 'reducer', :map_command => 'map_command', :default_mapper => 'default_mapper'
43
+ # @script.map_command.should == 'map_command'
44
+ # end
45
+ #
46
+ # it 'calls self if a reducer_klass is set' do
47
+ # @script.should_receive(:ruby_interpreter_path).and_return('/path/to/ruby')
48
+ # @script.should_receive(:this_script_filename).and_return('/path/to/this_script')
49
+ # @script.reduce_command.should == %Q{/path/to/ruby /path/to/this_script --reduce }
50
+ # end
51
+ # it 'calls default_reducer if nil reducer_klass and no reduce_command is set' do
52
+ # @script = Wukong::Script.new 'mapper', nil, :default_reducer => 'default_reducer'
53
+ # @script.reduce_command.should == 'default_reducer'
54
+ # end
55
+ # it 'calls reduce_command if nil reducer_klass and reduce_command is set' do
56
+ # @script = Wukong::Script.new 'mapper', nil, :reduce_command => 'reduce_command', :default_reducer => 'default_reducer'
57
+ # @script.reduce_command.should == 'reduce_command'
58
+ # end
59
+ #
60
+ # it 'runs script | sort | script when in local mode' do
61
+ # @script.should_receive(:run_mode).and_return('local')
62
+ # @script.should_receive(:map_command).and_return('map_command')
63
+ # @script.should_receive(:reduce_command).and_return('reduce_command')
64
+ # @script.runner_command("/path/in", "/path/out").should == %Q{ cat '/path/in' | map_command | sort | reduce_command > '/path/out'}
65
+ # end
66
+ #
67
+ # it 'calls out to hadoop when in non-local mode' do
68
+ # @script.should_receive(:run_mode).and_return('hadoop')
69
+ # @script.should_receive(:hadoop_command).and_return('hadoop_command whee!')
70
+ # @script.runner_command("/path/in", "/path/out").should == 'hadoop_command whee!'
71
+ # end
72
+ # end
73
+ #
74
+ # describe 'runner phase'
75
+ # it 'preserves non-internal-to-wukong params in non_wukong_params' do
76
+ # @script.options[:foo] = 'bar'
77
+ # @script.non_wukong_params.should == "--foo=bar"
78
+ # end
79
+ #
80
+ #
81
+ # end
@@ -0,0 +1,8 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ #
4
+ # describe_example_script(:parsing, 'dataflow/parsing.rb') do
5
+ # it 'runs' do
6
+ # Wukong::LocalRunner.run(subject, :default)
7
+ # end
8
+ # end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'wukong'
3
+
4
+ describe_example_script(:parse_apache_logs, 'dataflow/parse_apache_logs.rb') do
5
+ it 'runs' do
6
+ out, err = Gorillib::TestHelpers.capture_output do
7
+ Wukong::LocalRunner.receive(:flow => subject) do
8
+ run :default
9
+ end
10
+ end
11
+ out.string.split("\n").first.should =~ /\{\"ip_address\":\"[\d\.]+\",.*\"}/
12
+ end
13
+ end
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+ require 'wukong'
3
+
4
+ describe_example_script(:simple, 'dataflow/simple.rb') do
5
+ it 'runs' do
6
+ Wukong::LocalRunner.run(subject, :default)
7
+ end
8
+ end
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+ require 'wukong'
3
+
4
+ describe_example_script(:telegram, 'dataflow/telegram.rb') do
5
+ it 'runs' do
6
+ Wukong::LocalRunner.run(subject, :default)
7
+ end
8
+
9
+ context 'Recompose processor' do
10
+ subject{ Wukong::Widget::Recompose }
11
+ its(:field_names){ should include(:break_length) }
12
+
13
+ let(:words ){
14
+ # 0 5 1 5 2 5 3 5 4 5 5 5 6 5 7 5 8
15
+ %w[
16
+ If names be not correct, language is not in accordance with
17
+ the truth of things. If language be not in accordance with
18
+ the truth of things, affairs cannot be carried on to success. ] }
19
+
20
+ context '#process' do
21
+ it 'breaks lines correctly' do
22
+ (2..80).each do |len|
23
+ # run the data flow into an array sink
24
+ test_sink = Wukong::Sink::ArraySink.new
25
+ rc = subject.new(:break_length => len, :output => test_sink )
26
+ words.each{|word| rc.process(word) }
27
+ rc.stop
28
+ # start and end are correct
29
+ test_sink.records.first.should =~ /^If/
30
+ test_sink.records.last.should =~ /success\.$/
31
+ # lines should be as long as possible, but not longer
32
+ test_sink.records[0..-2].zip(test_sink.records[1..-1]) do |line, nextl|
33
+ nextw = nextl.split[0]
34
+ ((line.length <= len) || line !~ /\s/).should be_true
35
+ (line.length + nextw.length + 1 > len).should be_true
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+ require 'wukong'
3
+ require 'wukong/local_runner'
4
+
5
+ load Pathname.path_to(:examples, 'graph/minimum_spanning_tree.rb')
6
+
7
+ describe 'Minimum Spanning Tree', :examples_spec => true, :helpers => true do
8
+
9
+ context Wukong::Widget::DisjointForest do
10
+ subject{ Wukong::Widget::DisjointForest.new }
11
+
12
+ context 'operations' do
13
+ before do
14
+ %w[ AUS DFW ATL JFK SFO LGA LAX ].each{|el| subject.add el }
15
+ subject.union('DFW', 'AUS')
16
+ subject.union('ATL', 'JFK')
17
+ subject.union('ATL', 'DFW')
18
+ end
19
+
20
+ context '#find' do
21
+ it 'collapses elements into a shallow tree during a find' do
22
+ subject.parent['ATL'].should == 'JFK'
23
+ subject.parent['JFK'].should == 'AUS'
24
+ subject.find('ATL').should == 'AUS'
25
+ subject.parent['ATL'].should == 'AUS'
26
+ end
27
+ end
28
+ context '#union' do
29
+ it 'joins shallow tree to deep tree' do
30
+ end
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+ require 'wukong'
3
+ require 'wukong/local_runner'
4
+
5
+ load Pathname.path_to(:examples, 'text/pig_latin.rb')
6
+
7
+ describe 'Pig Latin', :examples_spec => true, :helpers => true do
8
+
9
+ context 'processor' do
10
+ subject{ Wukong::Widget::PigLatinize.new }
11
+ it 'breaks text into pig latin' do
12
+ subject.should_receive(:emit).with("Iway indfay ethay astramipay otay ebay ethay ostmay ensualsay ofway allway ethay altedsay uredcay eatsmay.")
13
+ subject.process("I find the pastrami to be the most sensual of all the salted cured meats.")
14
+ end
15
+ end
16
+
17
+ it 'runs' do
18
+ Wukong::LocalRunner.run(ExampleUniverse.dataflow(:pig_latin), :default)
19
+ end
20
+
21
+ end