wukong 1.5.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.textile +32 -0
- data/README.textile +58 -12
- data/TODO.textile +0 -8
- data/bin/hdp-bzip +12 -17
- data/bin/hdp-kill-task +1 -1
- data/bin/hdp-sort +7 -7
- data/bin/hdp-stream +7 -7
- data/bin/hdp-stream-flat +2 -3
- data/bin/setcat +11 -0
- data/bin/uniq-ord +59 -0
- data/examples/corpus/bucket_counter.rb +47 -0
- data/examples/corpus/dbpedia_abstract_to_sentences.rb +85 -0
- data/examples/corpus/sentence_coocurrence.rb +70 -0
- data/examples/emr/README.textile +110 -0
- data/examples/emr/dot_wukong_dir/emr_bootstrap.sh +1 -0
- data/examples/emr/elastic_mapreduce_example.rb +2 -2
- data/examples/ignore_me/counting.rb +56 -0
- data/examples/ignore_me/grouper.rb +71 -0
- data/examples/network_graph/adjacency_list.rb +2 -2
- data/examples/network_graph/breadth_first_search.rb +14 -21
- data/examples/network_graph/gen_multi_edge.rb +22 -13
- data/examples/pagerank/pagerank.rb +1 -1
- data/examples/pagerank/pagerank_initialize.rb +6 -10
- data/examples/sample_records.rb +6 -16
- data/examples/server_logs/apache_log_parser.rb +7 -22
- data/examples/server_logs/breadcrumbs.rb +39 -0
- data/examples/server_logs/logline.rb +27 -0
- data/examples/size.rb +3 -2
- data/examples/{binning_percentile_estimator.rb → stats/binning_percentile_estimator.rb} +9 -11
- data/examples/{rank_and_bin.rb → stats/rank_and_bin.rb} +2 -2
- data/examples/stupidly_simple_filter.rb +11 -14
- data/examples/word_count.rb +16 -36
- data/lib/wukong/and_pig.rb +2 -15
- data/lib/wukong/logger.rb +7 -28
- data/lib/wukong/periodic_monitor.rb +24 -9
- data/lib/wukong/script/emr_command.rb +1 -0
- data/lib/wukong/script/hadoop_command.rb +31 -29
- data/lib/wukong/script.rb +19 -14
- data/lib/wukong/store/cassandra_model.rb +2 -1
- data/lib/wukong/streamer/accumulating_reducer.rb +5 -9
- data/lib/wukong/streamer/base.rb +44 -3
- data/lib/wukong/streamer/counting_reducer.rb +12 -12
- data/lib/wukong/streamer/filter.rb +2 -2
- data/lib/wukong/streamer/list_reducer.rb +3 -3
- data/lib/wukong/streamer/reducer.rb +11 -0
- data/lib/wukong/streamer.rb +7 -3
- data/lib/wukong.rb +7 -3
- data/{examples → old}/cassandra_streaming/berlitz_for_cassandra.textile +0 -0
- data/{examples → old}/cassandra_streaming/client_interface_notes.textile +0 -0
- data/{examples → old}/cassandra_streaming/client_schema.textile +0 -0
- data/{examples → old}/cassandra_streaming/tuning.textile +0 -0
- data/wukong.gemspec +257 -285
- metadata +45 -62
- data/examples/cassandra_streaming/avromapper.rb +0 -85
- data/examples/cassandra_streaming/cassandra.avpr +0 -468
- data/examples/cassandra_streaming/cassandra_random_partitioner.rb +0 -62
- data/examples/cassandra_streaming/catter.sh +0 -45
- data/examples/cassandra_streaming/client_schema.avpr +0 -211
- data/examples/cassandra_streaming/foofile.avr +0 -0
- data/examples/cassandra_streaming/pymap.sh +0 -1
- data/examples/cassandra_streaming/pyreduce.sh +0 -1
- data/examples/cassandra_streaming/smutation.avpr +0 -188
- data/examples/cassandra_streaming/streamer.sh +0 -51
- data/examples/cassandra_streaming/struct_loader.rb +0 -24
- data/examples/count_keys.rb +0 -56
- data/examples/count_keys_at_mapper.rb +0 -57
- data/examples/emr/README-elastic_map_reduce.textile +0 -26
- data/examples/keystore/cassandra_batch_test.rb +0 -41
- data/examples/keystore/conditional_outputter_example.rb +0 -70
- data/examples/store/chunked_store_example.rb +0 -18
- data/lib/wukong/dfs.rb +0 -81
- data/lib/wukong/keystore/cassandra_conditional_outputter.rb +0 -122
- data/lib/wukong/keystore/redis_db.rb +0 -24
- data/lib/wukong/keystore/tyrant_db.rb +0 -137
- data/lib/wukong/keystore/tyrant_notes.textile +0 -145
- data/lib/wukong/models/graph.rb +0 -25
- data/lib/wukong/monitor/chunked_store.rb +0 -23
- data/lib/wukong/monitor/periodic_logger.rb +0 -34
- data/lib/wukong/monitor/periodic_monitor.rb +0 -70
- data/lib/wukong/monitor.rb +0 -7
- data/lib/wukong/rdf.rb +0 -104
- data/lib/wukong/streamer/cassandra_streamer.rb +0 -61
- data/lib/wukong/streamer/count_keys.rb +0 -30
- data/lib/wukong/streamer/count_lines.rb +0 -26
- data/lib/wukong/streamer/em_streamer.rb +0 -7
- data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +0 -22
- data/lib/wukong/wukong_class.rb +0 -21
@@ -8,13 +8,13 @@ module Wukong
|
|
8
8
|
|
9
9
|
# start with an empty list
|
10
10
|
def start! *args
|
11
|
-
|
11
|
+
@values = []
|
12
12
|
end
|
13
13
|
|
14
14
|
# aggregate all records.
|
15
15
|
# note that this accumulates the full *record* -- key, value, everything.
|
16
16
|
def accumulate *record
|
17
|
-
|
17
|
+
@values << record
|
18
18
|
end
|
19
19
|
|
20
20
|
# emit the key and all records, tab-separated
|
@@ -24,7 +24,7 @@ module Wukong
|
|
24
24
|
# values)
|
25
25
|
#
|
26
26
|
def finalize
|
27
|
-
yield [key, values.to_flat.join(";")].flatten
|
27
|
+
yield [key, @values.to_flat.join(";")].flatten
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
data/lib/wukong/streamer.rb
CHANGED
@@ -5,14 +5,18 @@ module Wukong
|
|
5
5
|
autoload :RecordStreamer, 'wukong/streamer/record_streamer'
|
6
6
|
autoload :StructStreamer, 'wukong/streamer/struct_streamer'
|
7
7
|
autoload :StructRecordizer, 'wukong/streamer/struct_streamer'
|
8
|
-
# cassandra goodies
|
9
|
-
autoload :CassandraStreamer, 'wukong/streamer/cassandra_streamer'
|
10
8
|
#
|
11
9
|
autoload :Filter, 'wukong/streamer/filter'
|
12
10
|
#
|
11
|
+
autoload :Reducer, 'wukong/streamer/reducer'
|
13
12
|
autoload :AccumulatingReducer, 'wukong/streamer/accumulating_reducer'
|
13
|
+
autoload :CountingReducer, 'wukong/streamer/counting_reducer'
|
14
14
|
autoload :ListReducer, 'wukong/streamer/list_reducer'
|
15
|
+
autoload :RankAndBinReducer, 'wukong/streamer/rank_and_bin_reducer'
|
15
16
|
autoload :UniqByLastReducer, 'wukong/streamer/uniq_by_last_reducer'
|
16
|
-
|
17
|
+
|
18
|
+
class Streamer < Base
|
19
|
+
end
|
20
|
+
|
17
21
|
end
|
18
22
|
end
|
data/lib/wukong.rb
CHANGED
@@ -1,13 +1,17 @@
|
|
1
|
+
require 'configliere'; Settings.use :define
|
1
2
|
require 'wukong/extensions'
|
2
3
|
require 'wukong/datatypes'
|
4
|
+
require 'wukong/periodic_monitor'
|
3
5
|
require 'wukong/logger'
|
4
|
-
|
6
|
+
autoload :BadRecord, 'wukong/bad_record'
|
5
7
|
autoload :TypedStruct, 'wukong/typed_struct'
|
6
|
-
require 'configliere'; Configliere.use :define
|
7
8
|
module Wukong
|
8
|
-
autoload :Dfs, 'wukong/dfs'
|
9
9
|
autoload :Script, 'wukong/script'
|
10
10
|
autoload :Streamer, 'wukong/streamer'
|
11
11
|
autoload :Store, 'wukong/store'
|
12
12
|
autoload :FilenamePattern, 'wukong/filename_pattern'
|
13
|
+
|
14
|
+
def self.run mapper, reducer=nil, options={}
|
15
|
+
Wukong::Script.new(mapper, reducer, options).run
|
16
|
+
end
|
13
17
|
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|