wukong 2.0.2 → 3.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +46 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.travis.yml +12 -0
- data/.yardopts +19 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +3 -0
- data/Guardfile +14 -0
- data/NOTES-travis.md +31 -0
- data/README.md +422 -0
- data/Rakefile +12 -0
- data/TODO.md +99 -0
- data/VERSION +1 -0
- data/bin/hdp-cp +0 -0
- data/bin/wu-flow +10 -0
- data/bin/wu-map +17 -0
- data/bin/wu-red +17 -0
- data/bin/wukong +17 -0
- data/data/CREDITS.md +355 -0
- data/data/graph/airfares.tsv +2174 -0
- data/data/text/gift_of_the_magi.txt +225 -0
- data/data/text/jabberwocky.txt +36 -0
- data/data/text/rectification_of_names.txt +33 -0
- data/{spec/data → data/twitter}/a_atsigns_b.tsv +0 -0
- data/{spec/data → data/twitter}/a_follows_b.tsv +0 -0
- data/{spec/data → data/twitter}/tweet.tsv +0 -0
- data/{spec/data → data/twitter}/twitter_user.tsv +0 -0
- data/data/wikipedia/dbpedia-sentences.tsv +1000 -0
- data/examples/dataflow.rb +28 -0
- data/examples/{server_logs/logline.rb → dataflow/apache_log_line.rb} +28 -18
- data/examples/dataflow/complex.rb +11 -0
- data/examples/dataflow/donuts.rb +13 -0
- data/examples/dataflow/parse_apache_logs.rb +16 -0
- data/examples/dataflow/pig_latinizer.rb +16 -0
- data/examples/dataflow/simple.rb +12 -0
- data/examples/dataflow/telegram.rb +45 -0
- data/examples/examples_helper.rb +9 -0
- data/examples/graph/minimum_spanning_tree.rb +73 -0
- data/examples/graph/union_find.rb +62 -0
- data/examples/text/latinize_text.rb +0 -0
- data/examples/text/pig_latin.rb +35 -0
- data/examples/tiny_count.rb +8 -0
- data/examples/tiny_count/jabberwocky_output.tsv +92 -0
- data/examples/twitter/locations.rb +29 -0
- data/examples/twitter/models.rb +24 -0
- data/examples/twitter/pt1-fiddle.pig +8 -0
- data/examples/twitter/pt2-simple_parse.pig +31 -0
- data/examples/twitter/pt2-simple_parse.rb +18 -0
- data/examples/twitter/pt3-join_on_zips.pig +39 -0
- data/examples/twitter/pt4-strong_links.rb +20 -0
- data/examples/twitter/pt5-lnglat_and_strong_links.pig +16 -0
- data/examples/twitter/states.tsv +50 -0
- data/examples/word_count.rb +36 -63
- data/examples/workflow/cherry_pie.md +104 -0
- data/examples/workflow/cherry_pie.rb +66 -0
- data/examples/workflow/fiddle.rb +24 -0
- data/examples/workflow/package_gem.rb +55 -0
- data/lib/{wukong/encoding.rb → away/escapement.rb} +0 -0
- data/lib/away/exe.rb +11 -0
- data/lib/away/experimental.rb +5 -0
- data/lib/away/from_file.rb +52 -0
- data/lib/away/job.rb +56 -0
- data/lib/away/job/rake_compat.rb +17 -0
- data/lib/away/registry.rb +79 -0
- data/lib/away/runner.rb +276 -0
- data/lib/away/runner/execute.rb +121 -0
- data/lib/away/script.rb +161 -0
- data/lib/away/script/hadoop_command.rb +240 -0
- data/lib/away/source/file_list_source.rb +15 -0
- data/lib/away/source/looper.rb +18 -0
- data/lib/away/task.rb +219 -0
- data/lib/hanuman.rb +9 -0
- data/lib/hanuman/action.rb +21 -0
- data/lib/hanuman/chain.rb +4 -0
- data/lib/hanuman/graph.rb +51 -0
- data/lib/hanuman/graphviz.rb +74 -0
- data/lib/hanuman/graphvizzer.rb +185 -0
- data/lib/hanuman/resource.rb +6 -0
- data/lib/hanuman/slot.rb +87 -0
- data/lib/hanuman/slottable.rb +220 -0
- data/lib/hanuman/stage.rb +51 -0
- data/lib/wukong.rb +31 -17
- data/lib/wukong/bad_record.rb +13 -16
- data/lib/wukong/dataflow.rb +103 -0
- data/lib/wukong/event.rb +44 -0
- data/lib/wukong/local_runner.rb +55 -0
- data/lib/wukong/mapred.rb +3 -0
- data/lib/wukong/model/faker.rb +136 -0
- data/lib/wukong/processor.rb +142 -0
- data/lib/wukong/settings.rb +0 -0
- data/lib/wukong/universe.rb +48 -0
- data/lib/wukong/version.rb +3 -0
- data/lib/wukong/widget/filter.rb +81 -0
- data/lib/wukong/widget/gibberish.rb +123 -0
- data/lib/wukong/widget/monitor.rb +26 -0
- data/lib/wukong/widget/reducer.rb +66 -0
- data/lib/wukong/widget/sink.rb +58 -0
- data/lib/wukong/widget/source.rb +120 -0
- data/lib/wukong/widget/stringifier.rb +50 -0
- data/lib/wukong/workflow.rb +22 -0
- data/lib/wukong/workflow/command.rb +42 -0
- data/old/config/emr-example.yaml +48 -0
- data/{examples → old/examples}/README.txt +0 -0
- data/{examples → old/examples}/contrib/jeans/README.markdown +0 -0
- data/{examples → old/examples}/contrib/jeans/data/normalized_sizes +0 -0
- data/{examples → old/examples}/contrib/jeans/data/orders.tsv +0 -0
- data/{examples → old/examples}/contrib/jeans/data/sizes +0 -0
- data/{examples → old/examples}/contrib/jeans/normalize.rb +0 -0
- data/{examples → old/examples}/contrib/jeans/sizes.rb +0 -0
- data/old/examples/corpus/bnc_word_freq.rb +44 -0
- data/{examples → old/examples}/corpus/bucket_counter.rb +0 -0
- data/{examples → old/examples}/corpus/dbpedia_abstract_to_sentences.rb +0 -0
- data/{examples → old/examples}/corpus/sentence_bigrams.rb +0 -0
- data/{examples → old/examples}/corpus/sentence_coocurrence.rb +0 -0
- data/old/examples/corpus/stopwords.rb +138 -0
- data/{examples → old/examples}/corpus/words_to_bigrams.rb +0 -0
- data/{examples → old/examples}/emr/README.textile +0 -0
- data/{examples → old/examples}/emr/dot_wukong_dir/credentials.json +0 -0
- data/{examples → old/examples}/emr/dot_wukong_dir/emr.yaml +0 -0
- data/{examples → old/examples}/emr/dot_wukong_dir/emr_bootstrap.sh +0 -0
- data/{examples → old/examples}/emr/elastic_mapreduce_example.rb +0 -0
- data/{examples → old/examples}/network_graph/adjacency_list.rb +0 -0
- data/{examples → old/examples}/network_graph/breadth_first_search.rb +0 -0
- data/{examples → old/examples}/network_graph/gen_2paths.rb +0 -0
- data/{examples → old/examples}/network_graph/gen_multi_edge.rb +0 -0
- data/{examples → old/examples}/network_graph/gen_symmetric_links.rb +0 -0
- data/{examples → old/examples}/pagerank/README.textile +0 -0
- data/{examples → old/examples}/pagerank/gen_initial_pagerank_graph.pig +0 -0
- data/{examples → old/examples}/pagerank/pagerank.rb +0 -0
- data/{examples → old/examples}/pagerank/pagerank_initialize.rb +0 -0
- data/{examples → old/examples}/pagerank/run_pagerank.sh +0 -0
- data/{examples → old/examples}/sample_records.rb +0 -0
- data/{examples → old/examples}/server_logs/apache_log_parser.rb +0 -4
- data/{examples → old/examples}/server_logs/breadcrumbs.rb +0 -0
- data/{examples → old/examples}/server_logs/nook.rb +0 -0
- data/{examples → old/examples}/server_logs/nook/faraday_dummy_adapter.rb +0 -0
- data/{examples → old/examples}/server_logs/user_agent.rb +0 -0
- data/{examples → old/examples}/simple_word_count.rb +0 -0
- data/{examples → old/examples}/size.rb +0 -0
- data/{examples → old/examples}/stats/avg_value_frequency.rb +0 -0
- data/{examples → old/examples}/stats/binning_percentile_estimator.rb +0 -0
- data/{examples → old/examples}/stats/data/avg_value_frequency.tsv +0 -0
- data/{examples → old/examples}/stats/rank_and_bin.rb +0 -0
- data/{examples → old/examples}/stupidly_simple_filter.rb +0 -0
- data/old/examples/word_count.rb +75 -0
- data/old/graph/graphviz_builder.rb +580 -0
- data/old/graph_easy/Attributes.pm +4181 -0
- data/old/graph_easy/Graphviz.pm +2232 -0
- data/old/wukong.rb +18 -0
- data/{lib → old}/wukong/and_pig.rb +0 -0
- data/old/wukong/bad_record.rb +18 -0
- data/{lib → old}/wukong/datatypes.rb +0 -0
- data/{lib → old}/wukong/datatypes/enum.rb +0 -0
- data/{lib → old}/wukong/datatypes/fake_types.rb +0 -0
- data/{lib → old}/wukong/decorator.rb +0 -0
- data/{lib → old}/wukong/encoding/asciize.rb +0 -0
- data/{lib → old}/wukong/extensions.rb +0 -0
- data/{lib → old}/wukong/extensions/array.rb +0 -0
- data/{lib → old}/wukong/extensions/blank.rb +0 -0
- data/{lib → old}/wukong/extensions/class.rb +0 -0
- data/{lib → old}/wukong/extensions/date_time.rb +0 -0
- data/{lib → old}/wukong/extensions/emittable.rb +0 -0
- data/{lib → old}/wukong/extensions/enumerable.rb +0 -0
- data/{lib → old}/wukong/extensions/hash.rb +0 -0
- data/{lib → old}/wukong/extensions/hash_keys.rb +0 -0
- data/{lib → old}/wukong/extensions/hash_like.rb +0 -0
- data/{lib → old}/wukong/extensions/hashlike_class.rb +0 -0
- data/{lib → old}/wukong/extensions/module.rb +0 -0
- data/{lib → old}/wukong/extensions/pathname.rb +0 -0
- data/{lib → old}/wukong/extensions/string.rb +0 -0
- data/{lib → old}/wukong/extensions/struct.rb +0 -0
- data/{lib → old}/wukong/extensions/symbol.rb +0 -0
- data/{lib → old}/wukong/filename_pattern.rb +0 -0
- data/old/wukong/helper.rb +7 -0
- data/old/wukong/helper/stopwords.rb +195 -0
- data/old/wukong/helper/tokenize.rb +35 -0
- data/{lib → old}/wukong/logger.rb +0 -0
- data/{lib → old}/wukong/periodic_monitor.rb +0 -0
- data/{lib → old}/wukong/schema.rb +0 -0
- data/{lib → old}/wukong/script.rb +0 -0
- data/{lib → old}/wukong/script/avro_command.rb +0 -0
- data/{lib → old}/wukong/script/cassandra_loader_script.rb +0 -0
- data/{lib → old}/wukong/script/emr_command.rb +0 -0
- data/{lib → old}/wukong/script/hadoop_command.rb +0 -0
- data/{lib → old}/wukong/script/local_command.rb +4 -1
- data/{lib → old}/wukong/store.rb +0 -0
- data/{lib → old}/wukong/store/base.rb +0 -0
- data/{lib → old}/wukong/store/cassandra.rb +0 -0
- data/{lib → old}/wukong/store/cassandra/streaming.rb +0 -0
- data/{lib → old}/wukong/store/cassandra/struct_loader.rb +0 -0
- data/{lib → old}/wukong/store/cassandra_model.rb +0 -0
- data/{lib → old}/wukong/store/chh_chunked_flat_file_store.rb +0 -0
- data/{lib → old}/wukong/store/chunked_flat_file_store.rb +0 -0
- data/{lib → old}/wukong/store/conditional_store.rb +0 -0
- data/{lib → old}/wukong/store/factory.rb +0 -0
- data/{lib → old}/wukong/store/flat_file_store.rb +0 -0
- data/{lib → old}/wukong/store/key_store.rb +0 -0
- data/{lib → old}/wukong/store/null_store.rb +0 -0
- data/{lib → old}/wukong/store/read_thru_store.rb +0 -0
- data/{lib → old}/wukong/store/tokyo_tdb_key_store.rb +0 -0
- data/{lib → old}/wukong/store/tyrant_rdb_key_store.rb +0 -0
- data/{lib → old}/wukong/store/tyrant_tdb_key_store.rb +0 -0
- data/{lib → old}/wukong/streamer.rb +8 -0
- data/{lib → old}/wukong/streamer/accumulating_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/base.rb +2 -1
- data/{lib → old}/wukong/streamer/counting_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/filter.rb +0 -0
- data/old/wukong/streamer/instance_streamer.rb +15 -0
- data/old/wukong/streamer/json_streamer.rb +21 -0
- data/{lib → old}/wukong/streamer/line_streamer.rb +0 -0
- data/{lib → old}/wukong/streamer/list_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/rank_and_bin_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/record_streamer.rb +0 -0
- data/{lib → old}/wukong/streamer/reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/set_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/struct_streamer.rb +0 -0
- data/{lib → old}/wukong/streamer/summing_reducer.rb +0 -0
- data/{lib → old}/wukong/streamer/uniq_by_last_reducer.rb +0 -0
- data/{lib → old}/wukong/typed_struct.rb +0 -0
- data/spec/away/encoding_spec.rb +32 -0
- data/spec/away/exe_spec.rb +20 -0
- data/spec/away/flow_spec.rb +82 -0
- data/spec/away/graph_spec.rb +6 -0
- data/spec/away/job_spec.rb +15 -0
- data/spec/away/rake_compat_spec.rb +9 -0
- data/spec/away/script_spec.rb +81 -0
- data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
- data/spec/examples/dataflow/parsing_spec.rb +13 -0
- data/spec/examples/dataflow/simple_spec.rb +8 -0
- data/spec/examples/dataflow/telegram_spec.rb +43 -0
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +35 -0
- data/spec/examples/text/pig_latin_spec.rb +21 -0
- data/spec/examples/workflow/cherry_pie_spec.rb +6 -0
- data/spec/hanuman/graph_spec.rb +17 -0
- data/spec/hanuman/graphviz_spec.rb +29 -0
- data/spec/hanuman/slot_spec.rb +2 -0
- data/spec/hanuman/stage_spec.rb +12 -0
- data/spec/spec_helper.rb +24 -6
- data/spec/support/examples_helper.rb +10 -0
- data/spec/support/hanuman_test_helpers.rb +90 -0
- data/spec/support/streamer_test_helpers.rb +6 -0
- data/spec/support/wukong_test_helpers.rb +43 -0
- data/spec/support/wukong_widget_helpers.rb +66 -0
- data/spec/wukong/dataflow_spec.rb +65 -0
- data/spec/wukong/local_runner_spec.rb +31 -0
- data/spec/wukong/model/faker_spec.rb +131 -0
- data/spec/wukong/processor_spec.rb +109 -0
- data/spec/wukong/runner_spec.rb +12 -0
- data/spec/wukong/widget/filter_spec.rb +99 -0
- data/spec/wukong/widget/sink_spec.rb +19 -0
- data/spec/wukong/widget/source_spec.rb +41 -0
- data/spec/wukong/widget/stringifier_spec.rb +51 -0
- data/spec/wukong/workflow/command_spec.rb +5 -0
- data/wukong.gemspec +36 -277
- metadata +421 -165
- data/CHANGELOG.textile +0 -106
- data/INSTALL.textile +0 -89
- data/README.textile +0 -274
- data/TODO.textile +0 -11
- data/examples/ignore_me/counting.rb +0 -55
- data/examples/ignore_me/grouper.rb +0 -71
- data/old/cassandra_streaming/berlitz_for_cassandra.textile +0 -22
- data/old/cassandra_streaming/client_interface_notes.textile +0 -200
- data/old/cassandra_streaming/client_schema.textile +0 -318
- data/old/cassandra_streaming/tuning.textile +0 -73
- data/spec/wukong/encoding_spec.rb +0 -36
- data/spec/wukong/script_spec.rb +0 -80
@@ -0,0 +1,15 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
#
|
4
|
+
# Instantiate an instance of 'record_model' for each line
|
5
|
+
class InstanceStreamer < Wukong::Streamer::RecordStreamer
|
6
|
+
class_attribute :record_model
|
7
|
+
|
8
|
+
def recordize(raw_record)
|
9
|
+
fields = super(raw_record)
|
10
|
+
[ record_model.new(*fields) ] if fields
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Note: it's on you to `require 'json'` somewhere (there's too many
|
6
|
+
class JsonStreamer < Wukong::Streamer::Base
|
7
|
+
|
8
|
+
#
|
9
|
+
# Parses the incoming record as JSON, returns a single arg to #process
|
10
|
+
#
|
11
|
+
def recordize line
|
12
|
+
begin
|
13
|
+
[JSON.parse(line)]
|
14
|
+
rescue StandardError => boom
|
15
|
+
bad_record!(boom, line.to_s)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# require 'spec_helper'
|
2
|
+
# require 'wukong/encoding'
|
3
|
+
#
|
4
|
+
# describe "Wukong encoding" do
|
5
|
+
#
|
6
|
+
# it 'en/decodes to xml by default' do
|
7
|
+
# Wukong.encode_str("&" ).should == '&'
|
8
|
+
# Wukong.decode_str("&" ).should == '&'
|
9
|
+
# end
|
10
|
+
# it 'en/decodes to xml with :xml' do
|
11
|
+
# Wukong.encode_str("&", :xml ).should == '&'
|
12
|
+
# Wukong.decode_str("&", :xml ).should == '&'
|
13
|
+
# end
|
14
|
+
# it 'url en/decodes with :url' do
|
15
|
+
# Wukong.encode_str("&", :url ).should == '%26'
|
16
|
+
# Wukong.decode_str("%26", :url ).should == '&'
|
17
|
+
# end
|
18
|
+
# { "'" => "'", "\t" => "	", "\n" => " ", nil => '',}.each do |raw, enc|
|
19
|
+
# it 'encodes #{raw} to #{enc}' do
|
20
|
+
# Wukong.encode_str(raw, :xml ).should == enc
|
21
|
+
# end
|
22
|
+
# it 'decodes #{enc} to #{raw}' do
|
23
|
+
# Wukong.decode_str(enc, :xml ).should == raw.to_s
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
# ["normal_string with %punctuation should `not be molested", ""].each do |str|
|
27
|
+
# it 'doesn\'t change #{str}' do
|
28
|
+
# Wukong.encode_str(str, :xml ).should == str
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# require 'spec_helper'
|
2
|
+
# require 'wukong'
|
3
|
+
#
|
4
|
+
# describe 'auto runner for scripts', :helpers => true do
|
5
|
+
#
|
6
|
+
# context 'at exit hook' do
|
7
|
+
#
|
8
|
+
# it 'defines a hook to run on exit'
|
9
|
+
#
|
10
|
+
# it 'does not run a script twice'
|
11
|
+
#
|
12
|
+
# it 'runs a script based on its invoked name (so that it works with symlinks)'
|
13
|
+
#
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# describe 'wukong shell at exit hook' do
|
19
|
+
# it 'runs pry at correct point'
|
20
|
+
# end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'wukong'
|
3
|
+
|
4
|
+
# describe 'wukong', :helpers => true do
|
5
|
+
# subject{ described_class.new(:example) }
|
6
|
+
#
|
7
|
+
# describe Wukong::Flow do
|
8
|
+
# let(:test_sink){ test_array_sink }
|
9
|
+
# let(:example_flow) do
|
10
|
+
# test_sink = test_sink()
|
11
|
+
# Wukong.flow(:simple) do
|
12
|
+
# source(:iter, 1..100) | limit(7) | test_sink
|
13
|
+
# end
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# context '#add_stage' do
|
17
|
+
# it 'creates right subclass of Wukong::Source' do
|
18
|
+
# subject.add_stage(:source, :iter, []).should be_a(Wukong::Source::Iter)
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# it 'works with a simple example' do
|
23
|
+
# example_flow.run
|
24
|
+
# test_array_sink.records.should == (1..7).to_a
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# # context '#run' do
|
28
|
+
# # let(:test_sink){ mock }
|
29
|
+
# # it 'announces events and calls methods in right order' do
|
30
|
+
# # test_sink = test_sink()
|
31
|
+
# # test_sink.should_receive(:tell).with(:beg_stream).ordered
|
32
|
+
# # test_sink.should_receive(:call).exactly(7).times.ordered
|
33
|
+
# # test_sink.should_receive(:finally).once.ordered
|
34
|
+
# # test_sink.should_receive(:tell).with(:end_stream).ordered
|
35
|
+
# # example_flow.run
|
36
|
+
# # end
|
37
|
+
# # end
|
38
|
+
#
|
39
|
+
# context '#stdin' do
|
40
|
+
# its(:stdin){ should be_a(Wukong::Source::Iter) }
|
41
|
+
# end
|
42
|
+
# context '#stdout' do
|
43
|
+
# its(:stdout){ should be_a(Wukong::Sink::Stdout) }
|
44
|
+
# end
|
45
|
+
# context '#stderr' do
|
46
|
+
# its(:stderr){ should be_a(Wukong::Sink::Stderr) }
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# describe Wukong do
|
52
|
+
# context '.streamer' do
|
53
|
+
# subject{ Wukong.streamer('from_meth'){ def call(rec) rec.reverse ; end ; def bob() 1 ; end } }
|
54
|
+
# it 'raises an error if the handle is not a valid identifier' do
|
55
|
+
# ->{ Wukong.streamer('1love') }.should raise_error(ArgumentError, /no funny/)
|
56
|
+
# ->{ Wukong.streamer('this/that') }.should raise_error(ArgumentError, /no funny/)
|
57
|
+
# ->{ Wukong.streamer('This::That') }.should raise_error(ArgumentError, /no funny/)
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
# it{ should < Wukong::Streamer }
|
61
|
+
# it{ should be_method_defined(:call) }
|
62
|
+
# it{ should be_method_defined(:bob) }
|
63
|
+
#
|
64
|
+
# it 'defines a constant in Wukong::Streamer' do
|
65
|
+
# subject.to_s.should == 'Wukong::Streamer::FromMeth'
|
66
|
+
# Wukong::Streamer.should be_const_defined(:FromMeth)
|
67
|
+
# end
|
68
|
+
# it 'raises if already defined' do
|
69
|
+
# subject
|
70
|
+
# ->{ Wukong.streamer('from_meth') }.should raise_error(ArgumentError, /already defined/i)
|
71
|
+
# end
|
72
|
+
#
|
73
|
+
# it 'works as expected' do
|
74
|
+
# subject.new.call("hi mom").should == "mom ih"
|
75
|
+
# subject.new.bob.should == 1
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# after{ Wukong::Streamer.send(:remove_const, :FromMeth) if Wukong::Streamer.const_defined?(:FromMeth) }
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
#
|
82
|
+
# end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# require 'spec_helper'
|
2
|
+
# require 'wukong'
|
3
|
+
#
|
4
|
+
# describe Wukong::Job, :helpers => true do
|
5
|
+
#
|
6
|
+
# context '#output_dir' do
|
7
|
+
# it 'has filename helpers'
|
8
|
+
# end
|
9
|
+
#
|
10
|
+
# context '#dry_run' do
|
11
|
+
# it 'does nothing when dry run flag is set'
|
12
|
+
#
|
13
|
+
# it 'announces each foregone action using Log.info'
|
14
|
+
# end
|
15
|
+
# end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# require 'spec_helper'
|
2
|
+
# require 'wukong/runner/hadoop'
|
3
|
+
#
|
4
|
+
# describe "Wukong::Runner::Hadoop" do
|
5
|
+
# before do
|
6
|
+
# ARGV.replace []
|
7
|
+
# @script = Wukong::Script.new 'mapper', 'reducer'
|
8
|
+
# end
|
9
|
+
#
|
10
|
+
# describe 'initialize' do
|
11
|
+
# it 'sets :reduce_tasks to 0 if reducer_klass is nil and no reduce_command or explicit setting' do
|
12
|
+
# @script = Wukong::Script.new 'mapper', nil
|
13
|
+
# @script.options[:reduce_tasks].should == 0
|
14
|
+
# end
|
15
|
+
# it 'respects :reduce_tasks if set even if reducer_klass is nil' do
|
16
|
+
# @script = Wukong::Script.new 'mapper', nil, :reduce_tasks => 1
|
17
|
+
# @script.options[:reduce_tasks].should == 1
|
18
|
+
# end
|
19
|
+
# it "doesn't set :reduce_tasks reduce_command is given" do
|
20
|
+
# @script = Wukong::Script.new 'mapper', nil, :reduce_command => 1
|
21
|
+
# @script.options[:reduce_tasks].should be_nil
|
22
|
+
# end
|
23
|
+
# it 'sets mapper_klass in initializer' do
|
24
|
+
# @script.mapper_klass.should == 'mapper'
|
25
|
+
# end
|
26
|
+
# it 'sets reducer_klass in initializer' do
|
27
|
+
# @script.reducer_klass.should == 'reducer'
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# describe 'child processes' do
|
32
|
+
# it 'calls self if a mapper_klass is set' do
|
33
|
+
# @script.should_receive(:ruby_interpreter_path).and_return('/path/to/ruby')
|
34
|
+
# @script.should_receive(:this_script_filename).and_return('/path/to/this_script')
|
35
|
+
# @script.map_command.should == %Q{/path/to/ruby /path/to/this_script --map }
|
36
|
+
# end
|
37
|
+
# it 'calls default_mapper if nil mapper_klass and no map_command is set' do
|
38
|
+
# @script = Wukong::Script.new nil, 'reducer', :default_mapper => 'default_mapper'
|
39
|
+
# @script.map_command.should == 'default_mapper'
|
40
|
+
# end
|
41
|
+
# it 'calls map_command if nil mapper_klass and map_command is set' do
|
42
|
+
# @script = Wukong::Script.new nil, 'reducer', :map_command => 'map_command', :default_mapper => 'default_mapper'
|
43
|
+
# @script.map_command.should == 'map_command'
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# it 'calls self if a reducer_klass is set' do
|
47
|
+
# @script.should_receive(:ruby_interpreter_path).and_return('/path/to/ruby')
|
48
|
+
# @script.should_receive(:this_script_filename).and_return('/path/to/this_script')
|
49
|
+
# @script.reduce_command.should == %Q{/path/to/ruby /path/to/this_script --reduce }
|
50
|
+
# end
|
51
|
+
# it 'calls default_reducer if nil reducer_klass and no reduce_command is set' do
|
52
|
+
# @script = Wukong::Script.new 'mapper', nil, :default_reducer => 'default_reducer'
|
53
|
+
# @script.reduce_command.should == 'default_reducer'
|
54
|
+
# end
|
55
|
+
# it 'calls reduce_command if nil reducer_klass and reduce_command is set' do
|
56
|
+
# @script = Wukong::Script.new 'mapper', nil, :reduce_command => 'reduce_command', :default_reducer => 'default_reducer'
|
57
|
+
# @script.reduce_command.should == 'reduce_command'
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
# it 'runs script | sort | script when in local mode' do
|
61
|
+
# @script.should_receive(:run_mode).and_return('local')
|
62
|
+
# @script.should_receive(:map_command).and_return('map_command')
|
63
|
+
# @script.should_receive(:reduce_command).and_return('reduce_command')
|
64
|
+
# @script.runner_command("/path/in", "/path/out").should == %Q{ cat '/path/in' | map_command | sort | reduce_command > '/path/out'}
|
65
|
+
# end
|
66
|
+
#
|
67
|
+
# it 'calls out to hadoop when in non-local mode' do
|
68
|
+
# @script.should_receive(:run_mode).and_return('hadoop')
|
69
|
+
# @script.should_receive(:hadoop_command).and_return('hadoop_command whee!')
|
70
|
+
# @script.runner_command("/path/in", "/path/out").should == 'hadoop_command whee!'
|
71
|
+
# end
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# describe 'runner phase'
|
75
|
+
# it 'preserves non-internal-to-wukong params in non_wukong_params' do
|
76
|
+
# @script.options[:foo] = 'bar'
|
77
|
+
# @script.non_wukong_params.should == "--foo=bar"
|
78
|
+
# end
|
79
|
+
#
|
80
|
+
#
|
81
|
+
# end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'wukong'
|
3
|
+
|
4
|
+
describe_example_script(:parse_apache_logs, 'dataflow/parse_apache_logs.rb') do
|
5
|
+
it 'runs' do
|
6
|
+
out, err = Gorillib::TestHelpers.capture_output do
|
7
|
+
Wukong::LocalRunner.receive(:flow => subject) do
|
8
|
+
run :default
|
9
|
+
end
|
10
|
+
end
|
11
|
+
out.string.split("\n").first.should =~ /\{\"ip_address\":\"[\d\.]+\",.*\"}/
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'wukong'
|
3
|
+
|
4
|
+
describe_example_script(:telegram, 'dataflow/telegram.rb') do
|
5
|
+
it 'runs' do
|
6
|
+
Wukong::LocalRunner.run(subject, :default)
|
7
|
+
end
|
8
|
+
|
9
|
+
context 'Recompose processor' do
|
10
|
+
subject{ Wukong::Widget::Recompose }
|
11
|
+
its(:field_names){ should include(:break_length) }
|
12
|
+
|
13
|
+
let(:words ){
|
14
|
+
# 0 5 1 5 2 5 3 5 4 5 5 5 6 5 7 5 8
|
15
|
+
%w[
|
16
|
+
If names be not correct, language is not in accordance with
|
17
|
+
the truth of things. If language be not in accordance with
|
18
|
+
the truth of things, affairs cannot be carried on to success. ] }
|
19
|
+
|
20
|
+
context '#process' do
|
21
|
+
it 'breaks lines correctly' do
|
22
|
+
(2..80).each do |len|
|
23
|
+
# run the data flow into an array sink
|
24
|
+
test_sink = Wukong::Sink::ArraySink.new
|
25
|
+
rc = subject.new(:break_length => len, :output => test_sink )
|
26
|
+
words.each{|word| rc.process(word) }
|
27
|
+
rc.stop
|
28
|
+
# start and end are correct
|
29
|
+
test_sink.records.first.should =~ /^If/
|
30
|
+
test_sink.records.last.should =~ /success\.$/
|
31
|
+
# lines should be as long as possible, but not longer
|
32
|
+
test_sink.records[0..-2].zip(test_sink.records[1..-1]) do |line, nextl|
|
33
|
+
nextw = nextl.split[0]
|
34
|
+
((line.length <= len) || line !~ /\s/).should be_true
|
35
|
+
(line.length + nextw.length + 1 > len).should be_true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'wukong'
|
3
|
+
require 'wukong/local_runner'
|
4
|
+
|
5
|
+
load Pathname.path_to(:examples, 'graph/minimum_spanning_tree.rb')
|
6
|
+
|
7
|
+
describe 'Minimum Spanning Tree', :examples_spec => true, :helpers => true do
|
8
|
+
|
9
|
+
context Wukong::Widget::DisjointForest do
|
10
|
+
subject{ Wukong::Widget::DisjointForest.new }
|
11
|
+
|
12
|
+
context 'operations' do
|
13
|
+
before do
|
14
|
+
%w[ AUS DFW ATL JFK SFO LGA LAX ].each{|el| subject.add el }
|
15
|
+
subject.union('DFW', 'AUS')
|
16
|
+
subject.union('ATL', 'JFK')
|
17
|
+
subject.union('ATL', 'DFW')
|
18
|
+
end
|
19
|
+
|
20
|
+
context '#find' do
|
21
|
+
it 'collapses elements into a shallow tree during a find' do
|
22
|
+
subject.parent['ATL'].should == 'JFK'
|
23
|
+
subject.parent['JFK'].should == 'AUS'
|
24
|
+
subject.find('ATL').should == 'AUS'
|
25
|
+
subject.parent['ATL'].should == 'AUS'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
context '#union' do
|
29
|
+
it 'joins shallow tree to deep tree' do
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'wukong'
|
3
|
+
require 'wukong/local_runner'
|
4
|
+
|
5
|
+
load Pathname.path_to(:examples, 'text/pig_latin.rb')
|
6
|
+
|
7
|
+
describe 'Pig Latin', :examples_spec => true, :helpers => true do
|
8
|
+
|
9
|
+
context 'processor' do
|
10
|
+
subject{ Wukong::Widget::PigLatinize.new }
|
11
|
+
it 'breaks text into pig latin' do
|
12
|
+
subject.should_receive(:emit).with("Iway indfay ethay astramipay otay ebay ethay ostmay ensualsay ofway allway ethay altedsay uredcay eatsmay.")
|
13
|
+
subject.process("I find the pastrami to be the most sensual of all the salted cured meats.")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'runs' do
|
18
|
+
Wukong::LocalRunner.run(ExampleUniverse.dataflow(:pig_latin), :default)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|