wukong 1.5.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/CHANGELOG.textile +32 -0
  2. data/README.textile +58 -12
  3. data/TODO.textile +0 -8
  4. data/bin/hdp-bzip +12 -17
  5. data/bin/hdp-kill-task +1 -1
  6. data/bin/hdp-sort +7 -7
  7. data/bin/hdp-stream +7 -7
  8. data/bin/hdp-stream-flat +2 -3
  9. data/bin/setcat +11 -0
  10. data/bin/uniq-ord +59 -0
  11. data/examples/corpus/bucket_counter.rb +47 -0
  12. data/examples/corpus/dbpedia_abstract_to_sentences.rb +85 -0
  13. data/examples/corpus/sentence_coocurrence.rb +70 -0
  14. data/examples/emr/README.textile +110 -0
  15. data/examples/emr/dot_wukong_dir/emr_bootstrap.sh +1 -0
  16. data/examples/emr/elastic_mapreduce_example.rb +2 -2
  17. data/examples/ignore_me/counting.rb +56 -0
  18. data/examples/ignore_me/grouper.rb +71 -0
  19. data/examples/network_graph/adjacency_list.rb +2 -2
  20. data/examples/network_graph/breadth_first_search.rb +14 -21
  21. data/examples/network_graph/gen_multi_edge.rb +22 -13
  22. data/examples/pagerank/pagerank.rb +1 -1
  23. data/examples/pagerank/pagerank_initialize.rb +6 -10
  24. data/examples/sample_records.rb +6 -16
  25. data/examples/server_logs/apache_log_parser.rb +7 -22
  26. data/examples/server_logs/breadcrumbs.rb +39 -0
  27. data/examples/server_logs/logline.rb +27 -0
  28. data/examples/size.rb +3 -2
  29. data/examples/{binning_percentile_estimator.rb → stats/binning_percentile_estimator.rb} +9 -11
  30. data/examples/{rank_and_bin.rb → stats/rank_and_bin.rb} +2 -2
  31. data/examples/stupidly_simple_filter.rb +11 -14
  32. data/examples/word_count.rb +16 -36
  33. data/lib/wukong/and_pig.rb +2 -15
  34. data/lib/wukong/logger.rb +7 -28
  35. data/lib/wukong/periodic_monitor.rb +24 -9
  36. data/lib/wukong/script/emr_command.rb +1 -0
  37. data/lib/wukong/script/hadoop_command.rb +31 -29
  38. data/lib/wukong/script.rb +19 -14
  39. data/lib/wukong/store/cassandra_model.rb +2 -1
  40. data/lib/wukong/streamer/accumulating_reducer.rb +5 -9
  41. data/lib/wukong/streamer/base.rb +44 -3
  42. data/lib/wukong/streamer/counting_reducer.rb +12 -12
  43. data/lib/wukong/streamer/filter.rb +2 -2
  44. data/lib/wukong/streamer/list_reducer.rb +3 -3
  45. data/lib/wukong/streamer/reducer.rb +11 -0
  46. data/lib/wukong/streamer.rb +7 -3
  47. data/lib/wukong.rb +7 -3
  48. data/{examples → old}/cassandra_streaming/berlitz_for_cassandra.textile +0 -0
  49. data/{examples → old}/cassandra_streaming/client_interface_notes.textile +0 -0
  50. data/{examples → old}/cassandra_streaming/client_schema.textile +0 -0
  51. data/{examples → old}/cassandra_streaming/tuning.textile +0 -0
  52. data/wukong.gemspec +257 -285
  53. metadata +45 -62
  54. data/examples/cassandra_streaming/avromapper.rb +0 -85
  55. data/examples/cassandra_streaming/cassandra.avpr +0 -468
  56. data/examples/cassandra_streaming/cassandra_random_partitioner.rb +0 -62
  57. data/examples/cassandra_streaming/catter.sh +0 -45
  58. data/examples/cassandra_streaming/client_schema.avpr +0 -211
  59. data/examples/cassandra_streaming/foofile.avr +0 -0
  60. data/examples/cassandra_streaming/pymap.sh +0 -1
  61. data/examples/cassandra_streaming/pyreduce.sh +0 -1
  62. data/examples/cassandra_streaming/smutation.avpr +0 -188
  63. data/examples/cassandra_streaming/streamer.sh +0 -51
  64. data/examples/cassandra_streaming/struct_loader.rb +0 -24
  65. data/examples/count_keys.rb +0 -56
  66. data/examples/count_keys_at_mapper.rb +0 -57
  67. data/examples/emr/README-elastic_map_reduce.textile +0 -26
  68. data/examples/keystore/cassandra_batch_test.rb +0 -41
  69. data/examples/keystore/conditional_outputter_example.rb +0 -70
  70. data/examples/store/chunked_store_example.rb +0 -18
  71. data/lib/wukong/dfs.rb +0 -81
  72. data/lib/wukong/keystore/cassandra_conditional_outputter.rb +0 -122
  73. data/lib/wukong/keystore/redis_db.rb +0 -24
  74. data/lib/wukong/keystore/tyrant_db.rb +0 -137
  75. data/lib/wukong/keystore/tyrant_notes.textile +0 -145
  76. data/lib/wukong/models/graph.rb +0 -25
  77. data/lib/wukong/monitor/chunked_store.rb +0 -23
  78. data/lib/wukong/monitor/periodic_logger.rb +0 -34
  79. data/lib/wukong/monitor/periodic_monitor.rb +0 -70
  80. data/lib/wukong/monitor.rb +0 -7
  81. data/lib/wukong/rdf.rb +0 -104
  82. data/lib/wukong/streamer/cassandra_streamer.rb +0 -61
  83. data/lib/wukong/streamer/count_keys.rb +0 -30
  84. data/lib/wukong/streamer/count_lines.rb +0 -26
  85. data/lib/wukong/streamer/em_streamer.rb +0 -7
  86. data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +0 -22
  87. data/lib/wukong/wukong_class.rb +0 -21
@@ -8,13 +8,13 @@ module Wukong
8
8
 
9
9
  # start with an empty list
10
10
  def start! *args
11
- self.values = []
11
+ @values = []
12
12
  end
13
13
 
14
14
  # aggregate all records.
15
15
  # note that this accumulates the full *record* -- key, value, everything.
16
16
  def accumulate *record
17
- self.values << record
17
+ @values << record
18
18
  end
19
19
 
20
20
  # emit the key and all records, tab-separated
@@ -24,7 +24,7 @@ module Wukong
24
24
  # values)
25
25
  #
26
26
  def finalize
27
- yield [key, values.to_flat.join(";")].flatten
27
+ yield [key, @values.to_flat.join(";")].flatten
28
28
  end
29
29
  end
30
30
  end
@@ -0,0 +1,11 @@
1
+ module Wukong
2
+ module Streamer
3
+ class Reducer < Wukong::Streamer::ListReducer
4
+
5
+ def finalize &block
6
+ reduce @values, &block
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -5,14 +5,18 @@ module Wukong
5
5
  autoload :RecordStreamer, 'wukong/streamer/record_streamer'
6
6
  autoload :StructStreamer, 'wukong/streamer/struct_streamer'
7
7
  autoload :StructRecordizer, 'wukong/streamer/struct_streamer'
8
- # cassandra goodies
9
- autoload :CassandraStreamer, 'wukong/streamer/cassandra_streamer'
10
8
  #
11
9
  autoload :Filter, 'wukong/streamer/filter'
12
10
  #
11
+ autoload :Reducer, 'wukong/streamer/reducer'
13
12
  autoload :AccumulatingReducer, 'wukong/streamer/accumulating_reducer'
13
+ autoload :CountingReducer, 'wukong/streamer/counting_reducer'
14
14
  autoload :ListReducer, 'wukong/streamer/list_reducer'
15
+ autoload :RankAndBinReducer, 'wukong/streamer/rank_and_bin_reducer'
15
16
  autoload :UniqByLastReducer, 'wukong/streamer/uniq_by_last_reducer'
16
- autoload :CountingReducer, 'wukong/streamer/counting_reducer'
17
+
18
+ class Streamer < Base
19
+ end
20
+
17
21
  end
18
22
  end
data/lib/wukong.rb CHANGED
@@ -1,13 +1,17 @@
1
+ require 'configliere'; Settings.use :define
1
2
  require 'wukong/extensions'
2
3
  require 'wukong/datatypes'
4
+ require 'wukong/periodic_monitor'
3
5
  require 'wukong/logger'
4
- require 'wukong/bad_record'
6
+ autoload :BadRecord, 'wukong/bad_record'
5
7
  autoload :TypedStruct, 'wukong/typed_struct'
6
- require 'configliere'; Configliere.use :define
7
8
  module Wukong
8
- autoload :Dfs, 'wukong/dfs'
9
9
  autoload :Script, 'wukong/script'
10
10
  autoload :Streamer, 'wukong/streamer'
11
11
  autoload :Store, 'wukong/store'
12
12
  autoload :FilenamePattern, 'wukong/filename_pattern'
13
+
14
+ def self.run mapper, reducer=nil, options={}
15
+ Wukong::Script.new(mapper, reducer, options).run
16
+ end
13
17
  end