wukong 1.5.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/CHANGELOG.textile +32 -0
  2. data/README.textile +58 -12
  3. data/TODO.textile +0 -8
  4. data/bin/hdp-bzip +12 -17
  5. data/bin/hdp-kill-task +1 -1
  6. data/bin/hdp-sort +7 -7
  7. data/bin/hdp-stream +7 -7
  8. data/bin/hdp-stream-flat +2 -3
  9. data/bin/setcat +11 -0
  10. data/bin/uniq-ord +59 -0
  11. data/examples/corpus/bucket_counter.rb +47 -0
  12. data/examples/corpus/dbpedia_abstract_to_sentences.rb +85 -0
  13. data/examples/corpus/sentence_coocurrence.rb +70 -0
  14. data/examples/emr/README.textile +110 -0
  15. data/examples/emr/dot_wukong_dir/emr_bootstrap.sh +1 -0
  16. data/examples/emr/elastic_mapreduce_example.rb +2 -2
  17. data/examples/ignore_me/counting.rb +56 -0
  18. data/examples/ignore_me/grouper.rb +71 -0
  19. data/examples/network_graph/adjacency_list.rb +2 -2
  20. data/examples/network_graph/breadth_first_search.rb +14 -21
  21. data/examples/network_graph/gen_multi_edge.rb +22 -13
  22. data/examples/pagerank/pagerank.rb +1 -1
  23. data/examples/pagerank/pagerank_initialize.rb +6 -10
  24. data/examples/sample_records.rb +6 -16
  25. data/examples/server_logs/apache_log_parser.rb +7 -22
  26. data/examples/server_logs/breadcrumbs.rb +39 -0
  27. data/examples/server_logs/logline.rb +27 -0
  28. data/examples/size.rb +3 -2
  29. data/examples/{binning_percentile_estimator.rb → stats/binning_percentile_estimator.rb} +9 -11
  30. data/examples/{rank_and_bin.rb → stats/rank_and_bin.rb} +2 -2
  31. data/examples/stupidly_simple_filter.rb +11 -14
  32. data/examples/word_count.rb +16 -36
  33. data/lib/wukong/and_pig.rb +2 -15
  34. data/lib/wukong/logger.rb +7 -28
  35. data/lib/wukong/periodic_monitor.rb +24 -9
  36. data/lib/wukong/script/emr_command.rb +1 -0
  37. data/lib/wukong/script/hadoop_command.rb +31 -29
  38. data/lib/wukong/script.rb +19 -14
  39. data/lib/wukong/store/cassandra_model.rb +2 -1
  40. data/lib/wukong/streamer/accumulating_reducer.rb +5 -9
  41. data/lib/wukong/streamer/base.rb +44 -3
  42. data/lib/wukong/streamer/counting_reducer.rb +12 -12
  43. data/lib/wukong/streamer/filter.rb +2 -2
  44. data/lib/wukong/streamer/list_reducer.rb +3 -3
  45. data/lib/wukong/streamer/reducer.rb +11 -0
  46. data/lib/wukong/streamer.rb +7 -3
  47. data/lib/wukong.rb +7 -3
  48. data/{examples → old}/cassandra_streaming/berlitz_for_cassandra.textile +0 -0
  49. data/{examples → old}/cassandra_streaming/client_interface_notes.textile +0 -0
  50. data/{examples → old}/cassandra_streaming/client_schema.textile +0 -0
  51. data/{examples → old}/cassandra_streaming/tuning.textile +0 -0
  52. data/wukong.gemspec +257 -285
  53. metadata +45 -62
  54. data/examples/cassandra_streaming/avromapper.rb +0 -85
  55. data/examples/cassandra_streaming/cassandra.avpr +0 -468
  56. data/examples/cassandra_streaming/cassandra_random_partitioner.rb +0 -62
  57. data/examples/cassandra_streaming/catter.sh +0 -45
  58. data/examples/cassandra_streaming/client_schema.avpr +0 -211
  59. data/examples/cassandra_streaming/foofile.avr +0 -0
  60. data/examples/cassandra_streaming/pymap.sh +0 -1
  61. data/examples/cassandra_streaming/pyreduce.sh +0 -1
  62. data/examples/cassandra_streaming/smutation.avpr +0 -188
  63. data/examples/cassandra_streaming/streamer.sh +0 -51
  64. data/examples/cassandra_streaming/struct_loader.rb +0 -24
  65. data/examples/count_keys.rb +0 -56
  66. data/examples/count_keys_at_mapper.rb +0 -57
  67. data/examples/emr/README-elastic_map_reduce.textile +0 -26
  68. data/examples/keystore/cassandra_batch_test.rb +0 -41
  69. data/examples/keystore/conditional_outputter_example.rb +0 -70
  70. data/examples/store/chunked_store_example.rb +0 -18
  71. data/lib/wukong/dfs.rb +0 -81
  72. data/lib/wukong/keystore/cassandra_conditional_outputter.rb +0 -122
  73. data/lib/wukong/keystore/redis_db.rb +0 -24
  74. data/lib/wukong/keystore/tyrant_db.rb +0 -137
  75. data/lib/wukong/keystore/tyrant_notes.textile +0 -145
  76. data/lib/wukong/models/graph.rb +0 -25
  77. data/lib/wukong/monitor/chunked_store.rb +0 -23
  78. data/lib/wukong/monitor/periodic_logger.rb +0 -34
  79. data/lib/wukong/monitor/periodic_monitor.rb +0 -70
  80. data/lib/wukong/monitor.rb +0 -7
  81. data/lib/wukong/rdf.rb +0 -104
  82. data/lib/wukong/streamer/cassandra_streamer.rb +0 -61
  83. data/lib/wukong/streamer/count_keys.rb +0 -30
  84. data/lib/wukong/streamer/count_lines.rb +0 -26
  85. data/lib/wukong/streamer/em_streamer.rb +0 -7
  86. data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +0 -22
  87. data/lib/wukong/wukong_class.rb +0 -21
@@ -8,13 +8,13 @@ module Wukong
8
8
 
9
9
  # start with an empty list
10
10
  def start! *args
11
- self.values = []
11
+ @values = []
12
12
  end
13
13
 
14
14
  # aggregate all records.
15
15
  # note that this accumulates the full *record* -- key, value, everything.
16
16
  def accumulate *record
17
- self.values << record
17
+ @values << record
18
18
  end
19
19
 
20
20
  # emit the key and all records, tab-separated
@@ -24,7 +24,7 @@ module Wukong
24
24
  # values)
25
25
  #
26
26
  def finalize
27
- yield [key, values.to_flat.join(";")].flatten
27
+ yield [key, @values.to_flat.join(";")].flatten
28
28
  end
29
29
  end
30
30
  end
@@ -0,0 +1,11 @@
1
+ module Wukong
2
+ module Streamer
3
+ class Reducer < Wukong::Streamer::ListReducer
4
+
5
+ def finalize &block
6
+ reduce @values, &block
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -5,14 +5,18 @@ module Wukong
5
5
  autoload :RecordStreamer, 'wukong/streamer/record_streamer'
6
6
  autoload :StructStreamer, 'wukong/streamer/struct_streamer'
7
7
  autoload :StructRecordizer, 'wukong/streamer/struct_streamer'
8
- # cassandra goodies
9
- autoload :CassandraStreamer, 'wukong/streamer/cassandra_streamer'
10
8
  #
11
9
  autoload :Filter, 'wukong/streamer/filter'
12
10
  #
11
+ autoload :Reducer, 'wukong/streamer/reducer'
13
12
  autoload :AccumulatingReducer, 'wukong/streamer/accumulating_reducer'
13
+ autoload :CountingReducer, 'wukong/streamer/counting_reducer'
14
14
  autoload :ListReducer, 'wukong/streamer/list_reducer'
15
+ autoload :RankAndBinReducer, 'wukong/streamer/rank_and_bin_reducer'
15
16
  autoload :UniqByLastReducer, 'wukong/streamer/uniq_by_last_reducer'
16
- autoload :CountingReducer, 'wukong/streamer/counting_reducer'
17
+
18
+ class Streamer < Base
19
+ end
20
+
17
21
  end
18
22
  end
data/lib/wukong.rb CHANGED
@@ -1,13 +1,17 @@
1
+ require 'configliere'; Settings.use :define
1
2
  require 'wukong/extensions'
2
3
  require 'wukong/datatypes'
4
+ require 'wukong/periodic_monitor'
3
5
  require 'wukong/logger'
4
- require 'wukong/bad_record'
6
+ autoload :BadRecord, 'wukong/bad_record'
5
7
  autoload :TypedStruct, 'wukong/typed_struct'
6
- require 'configliere'; Configliere.use :define
7
8
  module Wukong
8
- autoload :Dfs, 'wukong/dfs'
9
9
  autoload :Script, 'wukong/script'
10
10
  autoload :Streamer, 'wukong/streamer'
11
11
  autoload :Store, 'wukong/store'
12
12
  autoload :FilenamePattern, 'wukong/filename_pattern'
13
+
14
+ def self.run mapper, reducer=nil, options={}
15
+ Wukong::Script.new(mapper, reducer, options).run
16
+ end
13
17
  end