wukong 1.5.3 → 1.5.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG.textile +4 -0
  2. data/bin/hdp-bin +44 -0
  3. data/bin/hdp-ls +2 -1
  4. data/docpages/avro/performance.textile +36 -0
  5. data/examples/cassandra_streaming/avromapper.rb +85 -0
  6. data/examples/cassandra_streaming/berlitz_for_cassandra.textile +22 -0
  7. data/examples/cassandra_streaming/cassandra.avpr +468 -0
  8. data/examples/cassandra_streaming/cassandra_random_partitioner.rb +62 -0
  9. data/examples/cassandra_streaming/catter.sh +45 -0
  10. data/examples/cassandra_streaming/client_interface_notes.textile +200 -0
  11. data/examples/cassandra_streaming/client_schema.avpr +211 -0
  12. data/examples/cassandra_streaming/client_schema.textile +318 -0
  13. data/examples/cassandra_streaming/foofile.avr +0 -0
  14. data/examples/cassandra_streaming/pymap.sh +1 -0
  15. data/examples/cassandra_streaming/pyreduce.sh +1 -0
  16. data/examples/cassandra_streaming/smutation.avpr +188 -0
  17. data/examples/cassandra_streaming/streamer.sh +51 -0
  18. data/examples/cassandra_streaming/struct_loader.rb +24 -0
  19. data/examples/cassandra_streaming/tuning.textile +73 -0
  20. data/examples/emr/README-elastic_map_reduce.textile +26 -0
  21. data/examples/emr/dot_wukong_dir/credentials.json +7 -0
  22. data/examples/emr/{emr.yaml → dot_wukong_dir/emr.yaml} +33 -16
  23. data/{bin/bootstrap.sh → examples/emr/dot_wukong_dir/emr_bootstrap.sh} +1 -1
  24. data/examples/emr/elastic_mapreduce_example.rb +1 -0
  25. data/lib/wukong/encoding/asciize.rb +108 -0
  26. data/lib/wukong/extensions/date_time.rb +33 -7
  27. data/lib/wukong/extensions/emittable.rb +12 -25
  28. data/lib/wukong/extensions/hash_like.rb +13 -6
  29. data/lib/wukong/filename_pattern.rb +8 -7
  30. data/lib/wukong/schema.rb +47 -0
  31. data/lib/wukong/script.rb +7 -0
  32. data/lib/wukong/script/cassandra_loader_script.rb +40 -0
  33. data/lib/wukong/script/emr_command.rb +74 -43
  34. data/lib/wukong/script/hadoop_command.rb +89 -72
  35. data/lib/wukong/store.rb +2 -7
  36. data/lib/wukong/store/cassandra.rb +10 -0
  37. data/lib/wukong/store/cassandra/streaming.rb +75 -0
  38. data/lib/wukong/store/cassandra/struct_loader.rb +21 -0
  39. data/lib/wukong/store/cassandra_model.rb +90 -0
  40. data/lib/wukong/store/chh_chunked_flat_file_store.rb +1 -1
  41. data/lib/wukong/store/chunked_flat_file_store.rb +24 -20
  42. data/wukong.gemspec +32 -4
  43. metadata +33 -14
@@ -1,15 +1,10 @@
1
1
  module Wukong
2
2
  module Store
3
- # extend FactoryModule
4
3
  autoload :Base, 'wukong/store/base'
5
4
  autoload :FlatFileStore, 'wukong/store/flat_file_store'
6
- # autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
7
5
  autoload :ChunkedFlatFileStore, 'wukong/store/chunked_flat_file_store'
8
6
  autoload :ChhChunkedFlatFileStore, 'wukong/store/chh_chunked_flat_file_store'
9
- # autoload :KeyStore, 'monkeyshines/store/key_store'
10
- # autoload :TokyoTdbKeyStore, 'monkeyshines/store/tokyo_tdb_key_store'
11
- # autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
12
- # autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
13
- # autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
7
+
8
+ autoload :CassandraModel, 'wukong/store/cassandra_model'
14
9
  end
15
10
  end
@@ -0,0 +1,10 @@
1
+ Settings.define :cassandra_hosts, :default => '127.0.0.1:9160', :type => Array, :description => 'Comma-delimited list of hostname:port addresses for the Cassandra database holding Twitter API objects'
2
+ Settings.define :cassandra_keyspace, :default => 'soc_net_tw', :description => 'Cassandra keyspace for Twitter objects'
3
+
4
+ module Wukong
5
+ module Store
6
+ module CassandraStore
7
+ autoload :StructLoader, 'wukong/store/cassandra/struct_loader'
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,75 @@
1
+ require 'avro'
2
+
3
+ Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
4
+ module Wukong::Store::CassandraModel
5
+
6
+ #
7
+ # Store model using avro writer
8
+ #
9
+ def streaming_save
10
+ self.class.streaming_insert id, self
11
+ end
12
+ module ClassMethods
13
+
14
+ def streaming_writer
15
+ @streaming_writer ||= AvroWriter.new
16
+ end
17
+
18
+ #
19
+ # Use avro and stream into cassandra
20
+ #
21
+ def streaming_insert id, hsh
22
+ streaming_writer.put(id.to_s, hsh.to_db_hash)
23
+ end
24
+ end
25
+ class AvroWriter
26
+ #
27
+ # Reads in the protocol schema
28
+ # creates the necessary encoder and writer.
29
+ #
30
+ def initialize
31
+ schema_file = Settings.cassandra_avro_schema
32
+ @proto = Avro::Protocol.parse(File.read(schema_file))
33
+ @schema = @proto.types.detect{|schema| schema.name == 'StreamingMutation'}
34
+ @enc = Avro::IO::BinaryEncoder.new($stdout)
35
+ # @enc = DummyEncoder.new($stdout)
36
+ @writer = Avro::IO::DatumWriter.new(@schema)
37
+ # warn [@schema, @enc].inspect
38
+ end
39
+
40
+ def write key, col_name, value
41
+ @writer.write(smutation(key, col_name, value), @enc)
42
+ end
43
+
44
+ def write_directly key, col_name, value, timestamp, ttl
45
+ # Log.info "Insert(row_key => #{key}, col_name => #{col_name}, value => #{value}"
46
+ @enc.write_bytes(key)
47
+ @enc.write_bytes(col_name)
48
+ @enc.write_bytes(value)
49
+ @enc.write_long(timestamp)
50
+ @enc.write_int(ttl)
51
+ end
52
+
53
+ #
54
+ # Iterate through each key value pair in the hash to
55
+ # be inserted and write directly one at a time
56
+ #
57
+ def put id, hsh, timestamp=nil, ttl=0
58
+ timestamp ||= Time.now.to_i
59
+ hsh.each do |attr, val|
60
+ write_directly(id, attr, val, timestamp, ttl)
61
+ end
62
+ end
63
+
64
+ def smutation key, name, value
65
+ {
66
+ 'key' => key,
67
+ 'name' => name.to_s,
68
+ 'value' => value.to_s,
69
+ 'timestamp' => Time.epoch_microseconds,
70
+ 'ttl' => 0
71
+ }
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,21 @@
1
+ require 'avro'
2
+
3
+ Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
4
+
5
+ module Wukong::Store::Cassandra
6
+ class StructLoader < Wukong::Streamer::StructStreamer
7
+ def initialize *args
8
+ super(*args)
9
+ @log = PeriodicMonitor.new
10
+ end
11
+
12
+ #
13
+ # Blindly expects objects streaming by to have a "streaming_save" method
14
+ #
15
+ def process object, *_
16
+ # object.save
17
+ object.streaming_save
18
+ @log.periodically(object.to_flat)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,90 @@
1
+ module Wukong
2
+ module Store
3
+ #
4
+ # Barebones interface between a wukong class and a cassandra database
5
+ #
6
+ # Class must somehow provide a class-level cassandra_db accessor
7
+ # that sets the @cassandra_db instance variable.
8
+ #
9
+ module CassandraModel
10
+ #
11
+ # Store model to the DB
12
+ #
13
+ def save
14
+ self.class.insert key, self.to_db_hash
15
+ end
16
+
17
+ #
18
+ # Flatten attributes for storage in the DB.
19
+ #
20
+ # * omits elements whose value is nil
21
+ # * calls to_s on everything else
22
+ # * This means that blank strings are preserved;
23
+ # * and that false is saved as 'false'
24
+ #
25
+ # Override if you think something fancier than that should happen.
26
+ #
27
+ def to_db_hash
28
+ db_hsh = {}
29
+ to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
30
+ db_hsh
31
+ end
32
+
33
+ module ClassMethods
34
+ # Cassandra column family -- taken from the class name by default.
35
+ def table_name
36
+ class_basename
37
+ end
38
+
39
+ # Override to control how your class is instantiated from the DB hash
40
+ def from_db_hash *args
41
+ from_hash *args
42
+ end
43
+
44
+ # Insert into the cassandra database
45
+ # uses object's #to_db_hash method
46
+ def insert key, *args
47
+ hsh = args.first
48
+ cassandra_db.insert(table_name, key.to_s, hsh)
49
+ end
50
+
51
+ # Insert into the cassandra database
52
+ # calls out to object's #from_db_hash method
53
+ def load key
54
+ hsh = cassandra_db.get(self.class_basename, key.to_s)
55
+ from_db_hash(hsh) if hsh
56
+ end
57
+
58
+ # invalidates cassandra connection on errors where that makes sense.
59
+ def handle_error action, e
60
+ warn "#{action} failed: #{e} #{e.backtrace.join("\t")}" ;
61
+ @cassandra_db = nil
62
+ sleep 0.2
63
+ end
64
+ end
65
+ # The standard 'inject class methods when module is included' trick
66
+ def self.included base
67
+ base.class_eval{ extend ClassMethods}
68
+ end
69
+ end
70
+
71
+ end
72
+ end
73
+
74
+ Hash.class_eval do
75
+ #
76
+ # Flatten attributes for storage in the DB.
77
+ #
78
+ # * omits elements whose value is nil
79
+ # * calls to_s on everything else
80
+ # * This means that blank strings are preserved;
81
+ # * and that false is saved as 'false'
82
+ #
83
+ # Override if you think something fancier than that should happen.
84
+ #
85
+ def to_db_hash
86
+ db_hsh = {}
87
+ to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
88
+ db_hsh
89
+ end
90
+ end
@@ -12,7 +12,7 @@ module Wukong
12
12
  def initialize options={}
13
13
  # super wants a :filename in the options or it will fail. We need to get the initial filename
14
14
  # set up before we call super, so we need all of the parts of the pattern set up.
15
- self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
15
+ self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
16
16
  self.handle = options[:handle]
17
17
  pattern = options[:pattern] || Settings[:chunk_file_pattern]
18
18
  self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
@@ -1,41 +1,45 @@
1
+ require 'wukong/monitor/periodic_monitor'
1
2
  module Wukong
2
3
  module Store
3
4
  class ChunkedFlatFileStore < Wukong::Store::FlatFileStore
4
5
  attr_accessor :filename_pattern, :chunk_monitor, :handle, :chunktime, :rootdir
5
6
 
6
7
  # Move to configliere
7
- Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle:timestamp-:pid.tsv",:description => "The pattern for chunked files."
8
- Settings.define :chunk_file_chunktime, :default => 4*60*60,:description => "The time interval to keep a chunk file open."
9
- Settings.define :chunk_file_rootdir, :default => nil, :description => "The root directory for the chunked files."
10
-
8
+ Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle-:timestamp-:pid.tsv",:description => "The pattern for chunked files."
9
+ Settings.define :chunk_file_interval, :default => 4*60*60, :description => "The time interval to keep a chunk file open."
10
+ Settings.define :chunk_file_rootdir, :default => '/tmp', :description => "The root directory for the chunked files."
11
+
11
12
  #Note that filemode is inherited from flat_file
12
13
 
13
14
  def initialize options={}
14
15
  # super wants a :filename in the options or it will fail. We need to get the initial filename
15
- # set up before we call super, so we need all of the parts of the pattern set up.
16
- self.chunktime = options[:chunktime] || Settings[:chunk_file_chunktime]
17
- self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
18
- self.handle = options[:handle]
19
- pattern = options[:pattern] || Settings[:chunk_file_pattern]
16
+ # set up before we call super, so we need all of the parts of the pattern set up.
17
+ self.chunktime = options[:interval] || Settings[:chunk_file_interval]
18
+ self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
19
+ self.handle = options[:handle]
20
+ pattern = options[:pattern] || Settings[:chunk_file_pattern]
20
21
  self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
21
- options[:filename] = filename_pattern.make()
22
-
22
+ options[:filename] = filename_pattern.make()
23
+ options[:filemode] ||= 'a'
24
+ Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
25
+ self.chunk_monitor = Wukong::Monitor::PeriodicMonitor.new( :time => self.chunktime )
26
+
23
27
  super options
28
+ self.mkdir!
29
+ end
24
30
 
25
- Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
26
- self.chunk_monitor = Wukong::PeriodicMonitor.new( :time => self.chunktime )
31
+ def new_chunk!
32
+ new_filename = filename_pattern.make()
33
+ Log.info "Rotating chunked file #{filename} into #{new_filename}"
34
+ self.flush
35
+ self.close
36
+ @filename = new_filename
27
37
  self.mkdir!
28
38
  end
29
39
 
30
40
  def save *args
31
41
  result = super *args
32
- chunk_monitor.periodically do
33
- new_filename = filename_pattern.make()
34
- Log.info "Rotating chunked file #{filename} into #{new_filename}"
35
- self.close
36
- @filename = new_filename
37
- self.mkdir!
38
- end
42
+ chunk_monitor.periodically{ new_chunk! }
39
43
  result
40
44
  end
41
45
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{wukong}
8
- s.version = "1.5.3"
8
+ s.version = "1.5.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Philip (flip) Kromer"]
12
- s.date = %q{2010-08-19}
12
+ s.date = %q{2010-11-02}
13
13
  s.description = %q{ Treat your dataset like a:
14
14
 
15
15
  * stream of lines when it's efficient to process by lines
@@ -30,10 +30,10 @@ Gem::Specification.new do |s|
30
30
  "LICENSE.textile",
31
31
  "README.textile",
32
32
  "TODO.textile",
33
- "bin/bootstrap.sh",
34
33
  "bin/cutc",
35
34
  "bin/cuttab",
36
35
  "bin/greptrue",
36
+ "bin/hdp-bin",
37
37
  "bin/hdp-bzip",
38
38
  "bin/hdp-cat",
39
39
  "bin/hdp-catd",
@@ -75,6 +75,7 @@ Gem::Specification.new do |s|
75
75
  "docpages/UsingWukong-part3-parsing.textile",
76
76
  "docpages/_config.yml",
77
77
  "docpages/avro/avro_notes.textile",
78
+ "docpages/avro/performance.textile",
78
79
  "docpages/avro/tethering.textile",
79
80
  "docpages/bigdata-tips.textile",
80
81
  "docpages/code/api_response_example.txt",
@@ -129,6 +130,21 @@ Gem::Specification.new do |s|
129
130
  "docpages/wutils.textile",
130
131
  "examples/README.txt",
131
132
  "examples/binning_percentile_estimator.rb",
133
+ "examples/cassandra_streaming/avromapper.rb",
134
+ "examples/cassandra_streaming/berlitz_for_cassandra.textile",
135
+ "examples/cassandra_streaming/cassandra.avpr",
136
+ "examples/cassandra_streaming/cassandra_random_partitioner.rb",
137
+ "examples/cassandra_streaming/catter.sh",
138
+ "examples/cassandra_streaming/client_interface_notes.textile",
139
+ "examples/cassandra_streaming/client_schema.avpr",
140
+ "examples/cassandra_streaming/client_schema.textile",
141
+ "examples/cassandra_streaming/foofile.avr",
142
+ "examples/cassandra_streaming/pymap.sh",
143
+ "examples/cassandra_streaming/pyreduce.sh",
144
+ "examples/cassandra_streaming/smutation.avpr",
145
+ "examples/cassandra_streaming/streamer.sh",
146
+ "examples/cassandra_streaming/struct_loader.rb",
147
+ "examples/cassandra_streaming/tuning.textile",
132
148
  "examples/contrib/jeans/README.markdown",
133
149
  "examples/contrib/jeans/data/normalized_sizes",
134
150
  "examples/contrib/jeans/data/orders.tsv",
@@ -138,8 +154,11 @@ Gem::Specification.new do |s|
138
154
  "examples/corpus/words_to_bigrams.rb",
139
155
  "examples/count_keys.rb",
140
156
  "examples/count_keys_at_mapper.rb",
157
+ "examples/emr/README-elastic_map_reduce.textile",
158
+ "examples/emr/dot_wukong_dir/credentials.json",
159
+ "examples/emr/dot_wukong_dir/emr.yaml",
160
+ "examples/emr/dot_wukong_dir/emr_bootstrap.sh",
141
161
  "examples/emr/elastic_mapreduce_example.rb",
142
- "examples/emr/emr.yaml",
143
162
  "examples/keystore/cassandra_batch_test.rb",
144
163
  "examples/keystore/conditional_outputter_example.rb",
145
164
  "examples/network_graph/adjacency_list.rb",
@@ -171,6 +190,7 @@ Gem::Specification.new do |s|
171
190
  "lib/wukong/datatypes/fake_types.rb",
172
191
  "lib/wukong/dfs.rb",
173
192
  "lib/wukong/encoding.rb",
193
+ "lib/wukong/encoding/asciize.rb",
174
194
  "lib/wukong/extensions.rb",
175
195
  "lib/wukong/extensions/array.rb",
176
196
  "lib/wukong/extensions/blank.rb",
@@ -203,11 +223,16 @@ Gem::Specification.new do |s|
203
223
  "lib/wukong/schema.rb",
204
224
  "lib/wukong/script.rb",
205
225
  "lib/wukong/script/avro_command.rb",
226
+ "lib/wukong/script/cassandra_loader_script.rb",
206
227
  "lib/wukong/script/emr_command.rb",
207
228
  "lib/wukong/script/hadoop_command.rb",
208
229
  "lib/wukong/script/local_command.rb",
209
230
  "lib/wukong/store.rb",
210
231
  "lib/wukong/store/base.rb",
232
+ "lib/wukong/store/cassandra.rb",
233
+ "lib/wukong/store/cassandra/streaming.rb",
234
+ "lib/wukong/store/cassandra/struct_loader.rb",
235
+ "lib/wukong/store/cassandra_model.rb",
211
236
  "lib/wukong/store/chh_chunked_flat_file_store.rb",
212
237
  "lib/wukong/store/chunked_flat_file_store.rb",
213
238
  "lib/wukong/store/conditional_store.rb",
@@ -259,6 +284,9 @@ Gem::Specification.new do |s|
259
284
  "spec/wukong/encoding_spec.rb",
260
285
  "spec/wukong/script_spec.rb",
261
286
  "examples/binning_percentile_estimator.rb",
287
+ "examples/cassandra_streaming/avromapper.rb",
288
+ "examples/cassandra_streaming/cassandra_random_partitioner.rb",
289
+ "examples/cassandra_streaming/struct_loader.rb",
262
290
  "examples/contrib/jeans/normalize.rb",
263
291
  "examples/contrib/jeans/sizes.rb",
264
292
  "examples/corpus/words_to_bigrams.rb",
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- hash: 5
5
4
  prerelease: false
6
5
  segments:
7
6
  - 1
8
7
  - 5
9
- - 3
10
- version: 1.5.3
8
+ - 4
9
+ version: 1.5.4
11
10
  platform: ruby
12
11
  authors:
13
12
  - Philip (flip) Kromer
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-08-19 00:00:00 -05:00
17
+ date: 2010-11-02 00:00:00 -05:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -26,7 +25,6 @@ dependencies:
26
25
  requirements:
27
26
  - - ">="
28
27
  - !ruby/object:Gem::Version
29
- hash: 13
30
28
  segments:
31
29
  - 1
32
30
  - 2
@@ -42,7 +40,6 @@ dependencies:
42
40
  requirements:
43
41
  - - ">="
44
42
  - !ruby/object:Gem::Version
45
- hash: 3
46
43
  segments:
47
44
  - 0
48
45
  version: "0"
@@ -56,7 +53,6 @@ dependencies:
56
53
  requirements:
57
54
  - - ">="
58
55
  - !ruby/object:Gem::Version
59
- hash: 3
60
56
  segments:
61
57
  - 0
62
58
  version: "0"
@@ -70,7 +66,6 @@ dependencies:
70
66
  requirements:
71
67
  - - ">="
72
68
  - !ruby/object:Gem::Version
73
- hash: 3
74
69
  segments:
75
70
  - 0
76
71
  version: "0"
@@ -84,7 +79,6 @@ dependencies:
84
79
  requirements:
85
80
  - - ">="
86
81
  - !ruby/object:Gem::Version
87
- hash: 3
88
82
  segments:
89
83
  - 0
90
84
  version: "0"
@@ -98,7 +92,6 @@ dependencies:
98
92
  requirements:
99
93
  - - ">="
100
94
  - !ruby/object:Gem::Version
101
- hash: 3
102
95
  segments:
103
96
  - 0
104
97
  version: "0"
@@ -124,10 +117,10 @@ files:
124
117
  - LICENSE.textile
125
118
  - README.textile
126
119
  - TODO.textile
127
- - bin/bootstrap.sh
128
120
  - bin/cutc
129
121
  - bin/cuttab
130
122
  - bin/greptrue
123
+ - bin/hdp-bin
131
124
  - bin/hdp-bzip
132
125
  - bin/hdp-cat
133
126
  - bin/hdp-catd
@@ -169,6 +162,7 @@ files:
169
162
  - docpages/UsingWukong-part3-parsing.textile
170
163
  - docpages/_config.yml
171
164
  - docpages/avro/avro_notes.textile
165
+ - docpages/avro/performance.textile
172
166
  - docpages/avro/tethering.textile
173
167
  - docpages/bigdata-tips.textile
174
168
  - docpages/code/api_response_example.txt
@@ -223,6 +217,21 @@ files:
223
217
  - docpages/wutils.textile
224
218
  - examples/README.txt
225
219
  - examples/binning_percentile_estimator.rb
220
+ - examples/cassandra_streaming/avromapper.rb
221
+ - examples/cassandra_streaming/berlitz_for_cassandra.textile
222
+ - examples/cassandra_streaming/cassandra.avpr
223
+ - examples/cassandra_streaming/cassandra_random_partitioner.rb
224
+ - examples/cassandra_streaming/catter.sh
225
+ - examples/cassandra_streaming/client_interface_notes.textile
226
+ - examples/cassandra_streaming/client_schema.avpr
227
+ - examples/cassandra_streaming/client_schema.textile
228
+ - examples/cassandra_streaming/foofile.avr
229
+ - examples/cassandra_streaming/pymap.sh
230
+ - examples/cassandra_streaming/pyreduce.sh
231
+ - examples/cassandra_streaming/smutation.avpr
232
+ - examples/cassandra_streaming/streamer.sh
233
+ - examples/cassandra_streaming/struct_loader.rb
234
+ - examples/cassandra_streaming/tuning.textile
226
235
  - examples/contrib/jeans/README.markdown
227
236
  - examples/contrib/jeans/data/normalized_sizes
228
237
  - examples/contrib/jeans/data/orders.tsv
@@ -232,8 +241,11 @@ files:
232
241
  - examples/corpus/words_to_bigrams.rb
233
242
  - examples/count_keys.rb
234
243
  - examples/count_keys_at_mapper.rb
244
+ - examples/emr/README-elastic_map_reduce.textile
245
+ - examples/emr/dot_wukong_dir/credentials.json
246
+ - examples/emr/dot_wukong_dir/emr.yaml
247
+ - examples/emr/dot_wukong_dir/emr_bootstrap.sh
235
248
  - examples/emr/elastic_mapreduce_example.rb
236
- - examples/emr/emr.yaml
237
249
  - examples/keystore/cassandra_batch_test.rb
238
250
  - examples/keystore/conditional_outputter_example.rb
239
251
  - examples/network_graph/adjacency_list.rb
@@ -265,6 +277,7 @@ files:
265
277
  - lib/wukong/datatypes/fake_types.rb
266
278
  - lib/wukong/dfs.rb
267
279
  - lib/wukong/encoding.rb
280
+ - lib/wukong/encoding/asciize.rb
268
281
  - lib/wukong/extensions.rb
269
282
  - lib/wukong/extensions/array.rb
270
283
  - lib/wukong/extensions/blank.rb
@@ -297,11 +310,16 @@ files:
297
310
  - lib/wukong/schema.rb
298
311
  - lib/wukong/script.rb
299
312
  - lib/wukong/script/avro_command.rb
313
+ - lib/wukong/script/cassandra_loader_script.rb
300
314
  - lib/wukong/script/emr_command.rb
301
315
  - lib/wukong/script/hadoop_command.rb
302
316
  - lib/wukong/script/local_command.rb
303
317
  - lib/wukong/store.rb
304
318
  - lib/wukong/store/base.rb
319
+ - lib/wukong/store/cassandra.rb
320
+ - lib/wukong/store/cassandra/streaming.rb
321
+ - lib/wukong/store/cassandra/struct_loader.rb
322
+ - lib/wukong/store/cassandra_model.rb
305
323
  - lib/wukong/store/chh_chunked_flat_file_store.rb
306
324
  - lib/wukong/store/chunked_flat_file_store.rb
307
325
  - lib/wukong/store/conditional_store.rb
@@ -356,7 +374,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
356
374
  requirements:
357
375
  - - ">="
358
376
  - !ruby/object:Gem::Version
359
- hash: 3
360
377
  segments:
361
378
  - 0
362
379
  version: "0"
@@ -365,7 +382,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
365
382
  requirements:
366
383
  - - ">="
367
384
  - !ruby/object:Gem::Version
368
- hash: 3
369
385
  segments:
370
386
  - 0
371
387
  version: "0"
@@ -381,6 +397,9 @@ test_files:
381
397
  - spec/wukong/encoding_spec.rb
382
398
  - spec/wukong/script_spec.rb
383
399
  - examples/binning_percentile_estimator.rb
400
+ - examples/cassandra_streaming/avromapper.rb
401
+ - examples/cassandra_streaming/cassandra_random_partitioner.rb
402
+ - examples/cassandra_streaming/struct_loader.rb
384
403
  - examples/contrib/jeans/normalize.rb
385
404
  - examples/contrib/jeans/sizes.rb
386
405
  - examples/corpus/words_to_bigrams.rb