wukong 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/CHANGELOG.textile +4 -0
  2. data/bin/hdp-bin +44 -0
  3. data/bin/hdp-ls +2 -1
  4. data/docpages/avro/performance.textile +36 -0
  5. data/examples/cassandra_streaming/avromapper.rb +85 -0
  6. data/examples/cassandra_streaming/berlitz_for_cassandra.textile +22 -0
  7. data/examples/cassandra_streaming/cassandra.avpr +468 -0
  8. data/examples/cassandra_streaming/cassandra_random_partitioner.rb +62 -0
  9. data/examples/cassandra_streaming/catter.sh +45 -0
  10. data/examples/cassandra_streaming/client_interface_notes.textile +200 -0
  11. data/examples/cassandra_streaming/client_schema.avpr +211 -0
  12. data/examples/cassandra_streaming/client_schema.textile +318 -0
  13. data/examples/cassandra_streaming/foofile.avr +0 -0
  14. data/examples/cassandra_streaming/pymap.sh +1 -0
  15. data/examples/cassandra_streaming/pyreduce.sh +1 -0
  16. data/examples/cassandra_streaming/smutation.avpr +188 -0
  17. data/examples/cassandra_streaming/streamer.sh +51 -0
  18. data/examples/cassandra_streaming/struct_loader.rb +24 -0
  19. data/examples/cassandra_streaming/tuning.textile +73 -0
  20. data/examples/emr/README-elastic_map_reduce.textile +26 -0
  21. data/examples/emr/dot_wukong_dir/credentials.json +7 -0
  22. data/examples/emr/{emr.yaml → dot_wukong_dir/emr.yaml} +33 -16
  23. data/{bin/bootstrap.sh → examples/emr/dot_wukong_dir/emr_bootstrap.sh} +1 -1
  24. data/examples/emr/elastic_mapreduce_example.rb +1 -0
  25. data/lib/wukong/encoding/asciize.rb +108 -0
  26. data/lib/wukong/extensions/date_time.rb +33 -7
  27. data/lib/wukong/extensions/emittable.rb +12 -25
  28. data/lib/wukong/extensions/hash_like.rb +13 -6
  29. data/lib/wukong/filename_pattern.rb +8 -7
  30. data/lib/wukong/schema.rb +47 -0
  31. data/lib/wukong/script.rb +7 -0
  32. data/lib/wukong/script/cassandra_loader_script.rb +40 -0
  33. data/lib/wukong/script/emr_command.rb +74 -43
  34. data/lib/wukong/script/hadoop_command.rb +89 -72
  35. data/lib/wukong/store.rb +2 -7
  36. data/lib/wukong/store/cassandra.rb +10 -0
  37. data/lib/wukong/store/cassandra/streaming.rb +75 -0
  38. data/lib/wukong/store/cassandra/struct_loader.rb +21 -0
  39. data/lib/wukong/store/cassandra_model.rb +90 -0
  40. data/lib/wukong/store/chh_chunked_flat_file_store.rb +1 -1
  41. data/lib/wukong/store/chunked_flat_file_store.rb +24 -20
  42. data/wukong.gemspec +32 -4
  43. metadata +33 -14
@@ -1,15 +1,10 @@
1
1
  module Wukong
2
2
  module Store
3
- # extend FactoryModule
4
3
  autoload :Base, 'wukong/store/base'
5
4
  autoload :FlatFileStore, 'wukong/store/flat_file_store'
6
- # autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
7
5
  autoload :ChunkedFlatFileStore, 'wukong/store/chunked_flat_file_store'
8
6
  autoload :ChhChunkedFlatFileStore, 'wukong/store/chh_chunked_flat_file_store'
9
- # autoload :KeyStore, 'monkeyshines/store/key_store'
10
- # autoload :TokyoTdbKeyStore, 'monkeyshines/store/tokyo_tdb_key_store'
11
- # autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
12
- # autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
13
- # autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
7
+
8
+ autoload :CassandraModel, 'wukong/store/cassandra_model'
14
9
  end
15
10
  end
@@ -0,0 +1,10 @@
1
+ Settings.define :cassandra_hosts, :default => '127.0.0.1:9160', :type => Array, :description => 'Comma-delimited list of hostname:port addresses for the Cassandra database holding Twitter API objects'
2
+ Settings.define :cassandra_keyspace, :default => 'soc_net_tw', :description => 'Cassandra keyspace for Twitter objects'
3
+
4
+ module Wukong
5
+ module Store
6
+ module CassandraStore
7
+ autoload :StructLoader, 'wukong/store/cassandra/struct_loader'
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,75 @@
1
+ require 'avro'
2
+
3
+ Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
4
+ module Wukong::Store::CassandraModel
5
+
6
+ #
7
+ # Store model using avro writer
8
+ #
9
+ def streaming_save
10
+ self.class.streaming_insert id, self
11
+ end
12
+ module ClassMethods
13
+
14
+ def streaming_writer
15
+ @streaming_writer ||= AvroWriter.new
16
+ end
17
+
18
+ #
19
+ # Use avro and stream into cassandra
20
+ #
21
+ def streaming_insert id, hsh
22
+ streaming_writer.put(id.to_s, hsh.to_db_hash)
23
+ end
24
+ end
25
+ class AvroWriter
26
+ #
27
+ # Reads in the protocol schema
28
+ # creates the necessary encoder and writer.
29
+ #
30
+ def initialize
31
+ schema_file = Settings.cassandra_avro_schema
32
+ @proto = Avro::Protocol.parse(File.read(schema_file))
33
+ @schema = @proto.types.detect{|schema| schema.name == 'StreamingMutation'}
34
+ @enc = Avro::IO::BinaryEncoder.new($stdout)
35
+ # @enc = DummyEncoder.new($stdout)
36
+ @writer = Avro::IO::DatumWriter.new(@schema)
37
+ # warn [@schema, @enc].inspect
38
+ end
39
+
40
+ def write key, col_name, value
41
+ @writer.write(smutation(key, col_name, value), @enc)
42
+ end
43
+
44
+ def write_directly key, col_name, value, timestamp, ttl
45
+ # Log.info "Insert(row_key => #{key}, col_name => #{col_name}, value => #{value}"
46
+ @enc.write_bytes(key)
47
+ @enc.write_bytes(col_name)
48
+ @enc.write_bytes(value)
49
+ @enc.write_long(timestamp)
50
+ @enc.write_int(ttl)
51
+ end
52
+
53
+ #
54
+ # Iterate through each key value pair in the hash to
55
+ # be inserted and write directly one at a time
56
+ #
57
+ def put id, hsh, timestamp=nil, ttl=0
58
+ timestamp ||= Time.now.to_i
59
+ hsh.each do |attr, val|
60
+ write_directly(id, attr, val, timestamp, ttl)
61
+ end
62
+ end
63
+
64
+ def smutation key, name, value
65
+ {
66
+ 'key' => key,
67
+ 'name' => name.to_s,
68
+ 'value' => value.to_s,
69
+ 'timestamp' => Time.epoch_microseconds,
70
+ 'ttl' => 0
71
+ }
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,21 @@
1
+ require 'avro'
2
+
3
+ Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
4
+
5
+ module Wukong::Store::Cassandra
6
+ class StructLoader < Wukong::Streamer::StructStreamer
7
+ def initialize *args
8
+ super(*args)
9
+ @log = PeriodicMonitor.new
10
+ end
11
+
12
+ #
13
+ # Blindly expects objects streaming by to have a "streaming_save" method
14
+ #
15
+ def process object, *_
16
+ # object.save
17
+ object.streaming_save
18
+ @log.periodically(object.to_flat)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,90 @@
1
+ module Wukong
2
+ module Store
3
+ #
4
+ # Barebones interface between a wukong class and a cassandra database
5
+ #
6
+ # Class must somehow provide a class-level cassandra_db accessor
7
+ # that sets the @cassandra_db instance variable.
8
+ #
9
+ module CassandraModel
10
+ #
11
+ # Store model to the DB
12
+ #
13
+ def save
14
+ self.class.insert key, self.to_db_hash
15
+ end
16
+
17
+ #
18
+ # Flatten attributes for storage in the DB.
19
+ #
20
+ # * omits elements whose value is nil
21
+ # * calls to_s on everything else
22
+ # * This means that blank strings are preserved;
23
+ # * and that false is saved as 'false'
24
+ #
25
+ # Override if you think something fancier than that should happen.
26
+ #
27
+ def to_db_hash
28
+ db_hsh = {}
29
+ to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
30
+ db_hsh
31
+ end
32
+
33
+ module ClassMethods
34
+ # Cassandra column family -- taken from the class name by default.
35
+ def table_name
36
+ class_basename
37
+ end
38
+
39
+ # Override to control how your class is instantiated from the DB hash
40
+ def from_db_hash *args
41
+ from_hash *args
42
+ end
43
+
44
+ # Insert into the cassandra database
45
+ # uses object's #to_db_hash method
46
+ def insert key, *args
47
+ hsh = args.first
48
+ cassandra_db.insert(table_name, key.to_s, hsh)
49
+ end
50
+
51
+ # Insert into the cassandra database
52
+ # calls out to object's #from_db_hash method
53
+ def load key
54
+ hsh = cassandra_db.get(self.class_basename, key.to_s)
55
+ from_db_hash(hsh) if hsh
56
+ end
57
+
58
+ # invalidates cassandra connection on errors where that makes sense.
59
+ def handle_error action, e
60
+ warn "#{action} failed: #{e} #{e.backtrace.join("\t")}" ;
61
+ @cassandra_db = nil
62
+ sleep 0.2
63
+ end
64
+ end
65
+ # The standard 'inject class methods when module is included' trick
66
+ def self.included base
67
+ base.class_eval{ extend ClassMethods}
68
+ end
69
+ end
70
+
71
+ end
72
+ end
73
+
74
+ Hash.class_eval do
75
+ #
76
+ # Flatten attributes for storage in the DB.
77
+ #
78
+ # * omits elements whose value is nil
79
+ # * calls to_s on everything else
80
+ # * This means that blank strings are preserved;
81
+ # * and that false is saved as 'false'
82
+ #
83
+ # Override if you think something fancier than that should happen.
84
+ #
85
+ def to_db_hash
86
+ db_hsh = {}
87
+ to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
88
+ db_hsh
89
+ end
90
+ end
@@ -12,7 +12,7 @@ module Wukong
12
12
  def initialize options={}
13
13
  # super wants a :filename in the options or it will fail. We need to get the initial filename
14
14
  # set up before we call super, so we need all of the parts of the pattern set up.
15
- self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
15
+ self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
16
16
  self.handle = options[:handle]
17
17
  pattern = options[:pattern] || Settings[:chunk_file_pattern]
18
18
  self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
@@ -1,41 +1,45 @@
1
+ require 'wukong/monitor/periodic_monitor'
1
2
  module Wukong
2
3
  module Store
3
4
  class ChunkedFlatFileStore < Wukong::Store::FlatFileStore
4
5
  attr_accessor :filename_pattern, :chunk_monitor, :handle, :chunktime, :rootdir
5
6
 
6
7
  # Move to configliere
7
- Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle:timestamp-:pid.tsv",:description => "The pattern for chunked files."
8
- Settings.define :chunk_file_chunktime, :default => 4*60*60,:description => "The time interval to keep a chunk file open."
9
- Settings.define :chunk_file_rootdir, :default => nil, :description => "The root directory for the chunked files."
10
-
8
+ Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle-:timestamp-:pid.tsv",:description => "The pattern for chunked files."
9
+ Settings.define :chunk_file_interval, :default => 4*60*60, :description => "The time interval to keep a chunk file open."
10
+ Settings.define :chunk_file_rootdir, :default => '/tmp', :description => "The root directory for the chunked files."
11
+
11
12
  #Note that filemode is inherited from flat_file
12
13
 
13
14
  def initialize options={}
14
15
  # super wants a :filename in the options or it will fail. We need to get the initial filename
15
- # set up before we call super, so we need all of the parts of the pattern set up.
16
- self.chunktime = options[:chunktime] || Settings[:chunk_file_chunktime]
17
- self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
18
- self.handle = options[:handle]
19
- pattern = options[:pattern] || Settings[:chunk_file_pattern]
16
+ # set up before we call super, so we need all of the parts of the pattern set up.
17
+ self.chunktime = options[:interval] || Settings[:chunk_file_interval]
18
+ self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
19
+ self.handle = options[:handle]
20
+ pattern = options[:pattern] || Settings[:chunk_file_pattern]
20
21
  self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
21
- options[:filename] = filename_pattern.make()
22
-
22
+ options[:filename] = filename_pattern.make()
23
+ options[:filemode] ||= 'a'
24
+ Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
25
+ self.chunk_monitor = Wukong::Monitor::PeriodicMonitor.new( :time => self.chunktime )
26
+
23
27
  super options
28
+ self.mkdir!
29
+ end
24
30
 
25
- Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
26
- self.chunk_monitor = Wukong::PeriodicMonitor.new( :time => self.chunktime )
31
+ def new_chunk!
32
+ new_filename = filename_pattern.make()
33
+ Log.info "Rotating chunked file #{filename} into #{new_filename}"
34
+ self.flush
35
+ self.close
36
+ @filename = new_filename
27
37
  self.mkdir!
28
38
  end
29
39
 
30
40
  def save *args
31
41
  result = super *args
32
- chunk_monitor.periodically do
33
- new_filename = filename_pattern.make()
34
- Log.info "Rotating chunked file #{filename} into #{new_filename}"
35
- self.close
36
- @filename = new_filename
37
- self.mkdir!
38
- end
42
+ chunk_monitor.periodically{ new_chunk! }
39
43
  result
40
44
  end
41
45
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{wukong}
8
- s.version = "1.5.3"
8
+ s.version = "1.5.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Philip (flip) Kromer"]
12
- s.date = %q{2010-08-19}
12
+ s.date = %q{2010-11-02}
13
13
  s.description = %q{ Treat your dataset like a:
14
14
 
15
15
  * stream of lines when it's efficient to process by lines
@@ -30,10 +30,10 @@ Gem::Specification.new do |s|
30
30
  "LICENSE.textile",
31
31
  "README.textile",
32
32
  "TODO.textile",
33
- "bin/bootstrap.sh",
34
33
  "bin/cutc",
35
34
  "bin/cuttab",
36
35
  "bin/greptrue",
36
+ "bin/hdp-bin",
37
37
  "bin/hdp-bzip",
38
38
  "bin/hdp-cat",
39
39
  "bin/hdp-catd",
@@ -75,6 +75,7 @@ Gem::Specification.new do |s|
75
75
  "docpages/UsingWukong-part3-parsing.textile",
76
76
  "docpages/_config.yml",
77
77
  "docpages/avro/avro_notes.textile",
78
+ "docpages/avro/performance.textile",
78
79
  "docpages/avro/tethering.textile",
79
80
  "docpages/bigdata-tips.textile",
80
81
  "docpages/code/api_response_example.txt",
@@ -129,6 +130,21 @@ Gem::Specification.new do |s|
129
130
  "docpages/wutils.textile",
130
131
  "examples/README.txt",
131
132
  "examples/binning_percentile_estimator.rb",
133
+ "examples/cassandra_streaming/avromapper.rb",
134
+ "examples/cassandra_streaming/berlitz_for_cassandra.textile",
135
+ "examples/cassandra_streaming/cassandra.avpr",
136
+ "examples/cassandra_streaming/cassandra_random_partitioner.rb",
137
+ "examples/cassandra_streaming/catter.sh",
138
+ "examples/cassandra_streaming/client_interface_notes.textile",
139
+ "examples/cassandra_streaming/client_schema.avpr",
140
+ "examples/cassandra_streaming/client_schema.textile",
141
+ "examples/cassandra_streaming/foofile.avr",
142
+ "examples/cassandra_streaming/pymap.sh",
143
+ "examples/cassandra_streaming/pyreduce.sh",
144
+ "examples/cassandra_streaming/smutation.avpr",
145
+ "examples/cassandra_streaming/streamer.sh",
146
+ "examples/cassandra_streaming/struct_loader.rb",
147
+ "examples/cassandra_streaming/tuning.textile",
132
148
  "examples/contrib/jeans/README.markdown",
133
149
  "examples/contrib/jeans/data/normalized_sizes",
134
150
  "examples/contrib/jeans/data/orders.tsv",
@@ -138,8 +154,11 @@ Gem::Specification.new do |s|
138
154
  "examples/corpus/words_to_bigrams.rb",
139
155
  "examples/count_keys.rb",
140
156
  "examples/count_keys_at_mapper.rb",
157
+ "examples/emr/README-elastic_map_reduce.textile",
158
+ "examples/emr/dot_wukong_dir/credentials.json",
159
+ "examples/emr/dot_wukong_dir/emr.yaml",
160
+ "examples/emr/dot_wukong_dir/emr_bootstrap.sh",
141
161
  "examples/emr/elastic_mapreduce_example.rb",
142
- "examples/emr/emr.yaml",
143
162
  "examples/keystore/cassandra_batch_test.rb",
144
163
  "examples/keystore/conditional_outputter_example.rb",
145
164
  "examples/network_graph/adjacency_list.rb",
@@ -171,6 +190,7 @@ Gem::Specification.new do |s|
171
190
  "lib/wukong/datatypes/fake_types.rb",
172
191
  "lib/wukong/dfs.rb",
173
192
  "lib/wukong/encoding.rb",
193
+ "lib/wukong/encoding/asciize.rb",
174
194
  "lib/wukong/extensions.rb",
175
195
  "lib/wukong/extensions/array.rb",
176
196
  "lib/wukong/extensions/blank.rb",
@@ -203,11 +223,16 @@ Gem::Specification.new do |s|
203
223
  "lib/wukong/schema.rb",
204
224
  "lib/wukong/script.rb",
205
225
  "lib/wukong/script/avro_command.rb",
226
+ "lib/wukong/script/cassandra_loader_script.rb",
206
227
  "lib/wukong/script/emr_command.rb",
207
228
  "lib/wukong/script/hadoop_command.rb",
208
229
  "lib/wukong/script/local_command.rb",
209
230
  "lib/wukong/store.rb",
210
231
  "lib/wukong/store/base.rb",
232
+ "lib/wukong/store/cassandra.rb",
233
+ "lib/wukong/store/cassandra/streaming.rb",
234
+ "lib/wukong/store/cassandra/struct_loader.rb",
235
+ "lib/wukong/store/cassandra_model.rb",
211
236
  "lib/wukong/store/chh_chunked_flat_file_store.rb",
212
237
  "lib/wukong/store/chunked_flat_file_store.rb",
213
238
  "lib/wukong/store/conditional_store.rb",
@@ -259,6 +284,9 @@ Gem::Specification.new do |s|
259
284
  "spec/wukong/encoding_spec.rb",
260
285
  "spec/wukong/script_spec.rb",
261
286
  "examples/binning_percentile_estimator.rb",
287
+ "examples/cassandra_streaming/avromapper.rb",
288
+ "examples/cassandra_streaming/cassandra_random_partitioner.rb",
289
+ "examples/cassandra_streaming/struct_loader.rb",
262
290
  "examples/contrib/jeans/normalize.rb",
263
291
  "examples/contrib/jeans/sizes.rb",
264
292
  "examples/corpus/words_to_bigrams.rb",
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- hash: 5
5
4
  prerelease: false
6
5
  segments:
7
6
  - 1
8
7
  - 5
9
- - 3
10
- version: 1.5.3
8
+ - 4
9
+ version: 1.5.4
11
10
  platform: ruby
12
11
  authors:
13
12
  - Philip (flip) Kromer
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-08-19 00:00:00 -05:00
17
+ date: 2010-11-02 00:00:00 -05:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -26,7 +25,6 @@ dependencies:
26
25
  requirements:
27
26
  - - ">="
28
27
  - !ruby/object:Gem::Version
29
- hash: 13
30
28
  segments:
31
29
  - 1
32
30
  - 2
@@ -42,7 +40,6 @@ dependencies:
42
40
  requirements:
43
41
  - - ">="
44
42
  - !ruby/object:Gem::Version
45
- hash: 3
46
43
  segments:
47
44
  - 0
48
45
  version: "0"
@@ -56,7 +53,6 @@ dependencies:
56
53
  requirements:
57
54
  - - ">="
58
55
  - !ruby/object:Gem::Version
59
- hash: 3
60
56
  segments:
61
57
  - 0
62
58
  version: "0"
@@ -70,7 +66,6 @@ dependencies:
70
66
  requirements:
71
67
  - - ">="
72
68
  - !ruby/object:Gem::Version
73
- hash: 3
74
69
  segments:
75
70
  - 0
76
71
  version: "0"
@@ -84,7 +79,6 @@ dependencies:
84
79
  requirements:
85
80
  - - ">="
86
81
  - !ruby/object:Gem::Version
87
- hash: 3
88
82
  segments:
89
83
  - 0
90
84
  version: "0"
@@ -98,7 +92,6 @@ dependencies:
98
92
  requirements:
99
93
  - - ">="
100
94
  - !ruby/object:Gem::Version
101
- hash: 3
102
95
  segments:
103
96
  - 0
104
97
  version: "0"
@@ -124,10 +117,10 @@ files:
124
117
  - LICENSE.textile
125
118
  - README.textile
126
119
  - TODO.textile
127
- - bin/bootstrap.sh
128
120
  - bin/cutc
129
121
  - bin/cuttab
130
122
  - bin/greptrue
123
+ - bin/hdp-bin
131
124
  - bin/hdp-bzip
132
125
  - bin/hdp-cat
133
126
  - bin/hdp-catd
@@ -169,6 +162,7 @@ files:
169
162
  - docpages/UsingWukong-part3-parsing.textile
170
163
  - docpages/_config.yml
171
164
  - docpages/avro/avro_notes.textile
165
+ - docpages/avro/performance.textile
172
166
  - docpages/avro/tethering.textile
173
167
  - docpages/bigdata-tips.textile
174
168
  - docpages/code/api_response_example.txt
@@ -223,6 +217,21 @@ files:
223
217
  - docpages/wutils.textile
224
218
  - examples/README.txt
225
219
  - examples/binning_percentile_estimator.rb
220
+ - examples/cassandra_streaming/avromapper.rb
221
+ - examples/cassandra_streaming/berlitz_for_cassandra.textile
222
+ - examples/cassandra_streaming/cassandra.avpr
223
+ - examples/cassandra_streaming/cassandra_random_partitioner.rb
224
+ - examples/cassandra_streaming/catter.sh
225
+ - examples/cassandra_streaming/client_interface_notes.textile
226
+ - examples/cassandra_streaming/client_schema.avpr
227
+ - examples/cassandra_streaming/client_schema.textile
228
+ - examples/cassandra_streaming/foofile.avr
229
+ - examples/cassandra_streaming/pymap.sh
230
+ - examples/cassandra_streaming/pyreduce.sh
231
+ - examples/cassandra_streaming/smutation.avpr
232
+ - examples/cassandra_streaming/streamer.sh
233
+ - examples/cassandra_streaming/struct_loader.rb
234
+ - examples/cassandra_streaming/tuning.textile
226
235
  - examples/contrib/jeans/README.markdown
227
236
  - examples/contrib/jeans/data/normalized_sizes
228
237
  - examples/contrib/jeans/data/orders.tsv
@@ -232,8 +241,11 @@ files:
232
241
  - examples/corpus/words_to_bigrams.rb
233
242
  - examples/count_keys.rb
234
243
  - examples/count_keys_at_mapper.rb
244
+ - examples/emr/README-elastic_map_reduce.textile
245
+ - examples/emr/dot_wukong_dir/credentials.json
246
+ - examples/emr/dot_wukong_dir/emr.yaml
247
+ - examples/emr/dot_wukong_dir/emr_bootstrap.sh
235
248
  - examples/emr/elastic_mapreduce_example.rb
236
- - examples/emr/emr.yaml
237
249
  - examples/keystore/cassandra_batch_test.rb
238
250
  - examples/keystore/conditional_outputter_example.rb
239
251
  - examples/network_graph/adjacency_list.rb
@@ -265,6 +277,7 @@ files:
265
277
  - lib/wukong/datatypes/fake_types.rb
266
278
  - lib/wukong/dfs.rb
267
279
  - lib/wukong/encoding.rb
280
+ - lib/wukong/encoding/asciize.rb
268
281
  - lib/wukong/extensions.rb
269
282
  - lib/wukong/extensions/array.rb
270
283
  - lib/wukong/extensions/blank.rb
@@ -297,11 +310,16 @@ files:
297
310
  - lib/wukong/schema.rb
298
311
  - lib/wukong/script.rb
299
312
  - lib/wukong/script/avro_command.rb
313
+ - lib/wukong/script/cassandra_loader_script.rb
300
314
  - lib/wukong/script/emr_command.rb
301
315
  - lib/wukong/script/hadoop_command.rb
302
316
  - lib/wukong/script/local_command.rb
303
317
  - lib/wukong/store.rb
304
318
  - lib/wukong/store/base.rb
319
+ - lib/wukong/store/cassandra.rb
320
+ - lib/wukong/store/cassandra/streaming.rb
321
+ - lib/wukong/store/cassandra/struct_loader.rb
322
+ - lib/wukong/store/cassandra_model.rb
305
323
  - lib/wukong/store/chh_chunked_flat_file_store.rb
306
324
  - lib/wukong/store/chunked_flat_file_store.rb
307
325
  - lib/wukong/store/conditional_store.rb
@@ -356,7 +374,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
356
374
  requirements:
357
375
  - - ">="
358
376
  - !ruby/object:Gem::Version
359
- hash: 3
360
377
  segments:
361
378
  - 0
362
379
  version: "0"
@@ -365,7 +382,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
365
382
  requirements:
366
383
  - - ">="
367
384
  - !ruby/object:Gem::Version
368
- hash: 3
369
385
  segments:
370
386
  - 0
371
387
  version: "0"
@@ -381,6 +397,9 @@ test_files:
381
397
  - spec/wukong/encoding_spec.rb
382
398
  - spec/wukong/script_spec.rb
383
399
  - examples/binning_percentile_estimator.rb
400
+ - examples/cassandra_streaming/avromapper.rb
401
+ - examples/cassandra_streaming/cassandra_random_partitioner.rb
402
+ - examples/cassandra_streaming/struct_loader.rb
384
403
  - examples/contrib/jeans/normalize.rb
385
404
  - examples/contrib/jeans/sizes.rb
386
405
  - examples/corpus/words_to_bigrams.rb