wukong 1.5.3 → 1.5.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.textile +4 -0
- data/bin/hdp-bin +44 -0
- data/bin/hdp-ls +2 -1
- data/docpages/avro/performance.textile +36 -0
- data/examples/cassandra_streaming/avromapper.rb +85 -0
- data/examples/cassandra_streaming/berlitz_for_cassandra.textile +22 -0
- data/examples/cassandra_streaming/cassandra.avpr +468 -0
- data/examples/cassandra_streaming/cassandra_random_partitioner.rb +62 -0
- data/examples/cassandra_streaming/catter.sh +45 -0
- data/examples/cassandra_streaming/client_interface_notes.textile +200 -0
- data/examples/cassandra_streaming/client_schema.avpr +211 -0
- data/examples/cassandra_streaming/client_schema.textile +318 -0
- data/examples/cassandra_streaming/foofile.avr +0 -0
- data/examples/cassandra_streaming/pymap.sh +1 -0
- data/examples/cassandra_streaming/pyreduce.sh +1 -0
- data/examples/cassandra_streaming/smutation.avpr +188 -0
- data/examples/cassandra_streaming/streamer.sh +51 -0
- data/examples/cassandra_streaming/struct_loader.rb +24 -0
- data/examples/cassandra_streaming/tuning.textile +73 -0
- data/examples/emr/README-elastic_map_reduce.textile +26 -0
- data/examples/emr/dot_wukong_dir/credentials.json +7 -0
- data/examples/emr/{emr.yaml → dot_wukong_dir/emr.yaml} +33 -16
- data/{bin/bootstrap.sh → examples/emr/dot_wukong_dir/emr_bootstrap.sh} +1 -1
- data/examples/emr/elastic_mapreduce_example.rb +1 -0
- data/lib/wukong/encoding/asciize.rb +108 -0
- data/lib/wukong/extensions/date_time.rb +33 -7
- data/lib/wukong/extensions/emittable.rb +12 -25
- data/lib/wukong/extensions/hash_like.rb +13 -6
- data/lib/wukong/filename_pattern.rb +8 -7
- data/lib/wukong/schema.rb +47 -0
- data/lib/wukong/script.rb +7 -0
- data/lib/wukong/script/cassandra_loader_script.rb +40 -0
- data/lib/wukong/script/emr_command.rb +74 -43
- data/lib/wukong/script/hadoop_command.rb +89 -72
- data/lib/wukong/store.rb +2 -7
- data/lib/wukong/store/cassandra.rb +10 -0
- data/lib/wukong/store/cassandra/streaming.rb +75 -0
- data/lib/wukong/store/cassandra/struct_loader.rb +21 -0
- data/lib/wukong/store/cassandra_model.rb +90 -0
- data/lib/wukong/store/chh_chunked_flat_file_store.rb +1 -1
- data/lib/wukong/store/chunked_flat_file_store.rb +24 -20
- data/wukong.gemspec +32 -4
- metadata +33 -14
data/lib/wukong/store.rb
CHANGED
@@ -1,15 +1,10 @@
|
|
1
1
|
module Wukong
|
2
2
|
module Store
|
3
|
-
# extend FactoryModule
|
4
3
|
autoload :Base, 'wukong/store/base'
|
5
4
|
autoload :FlatFileStore, 'wukong/store/flat_file_store'
|
6
|
-
# autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
|
7
5
|
autoload :ChunkedFlatFileStore, 'wukong/store/chunked_flat_file_store'
|
8
6
|
autoload :ChhChunkedFlatFileStore, 'wukong/store/chh_chunked_flat_file_store'
|
9
|
-
|
10
|
-
|
11
|
-
# autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
|
12
|
-
# autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
|
13
|
-
# autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
|
7
|
+
|
8
|
+
autoload :CassandraModel, 'wukong/store/cassandra_model'
|
14
9
|
end
|
15
10
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Settings.define :cassandra_hosts, :default => '127.0.0.1:9160', :type => Array, :description => 'Comma-delimited list of hostname:port addresses for the Cassandra database holding Twitter API objects'
|
2
|
+
Settings.define :cassandra_keyspace, :default => 'soc_net_tw', :description => 'Cassandra keyspace for Twitter objects'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
module Store
|
6
|
+
module CassandraStore
|
7
|
+
autoload :StructLoader, 'wukong/store/cassandra/struct_loader'
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'avro'
|
2
|
+
|
3
|
+
Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
|
4
|
+
module Wukong::Store::CassandraModel
|
5
|
+
|
6
|
+
#
|
7
|
+
# Store model using avro writer
|
8
|
+
#
|
9
|
+
def streaming_save
|
10
|
+
self.class.streaming_insert id, self
|
11
|
+
end
|
12
|
+
module ClassMethods
|
13
|
+
|
14
|
+
def streaming_writer
|
15
|
+
@streaming_writer ||= AvroWriter.new
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Use avro and stream into cassandra
|
20
|
+
#
|
21
|
+
def streaming_insert id, hsh
|
22
|
+
streaming_writer.put(id.to_s, hsh.to_db_hash)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
class AvroWriter
|
26
|
+
#
|
27
|
+
# Reads in the protocol schema
|
28
|
+
# creates the necessary encoder and writer.
|
29
|
+
#
|
30
|
+
def initialize
|
31
|
+
schema_file = Settings.cassandra_avro_schema
|
32
|
+
@proto = Avro::Protocol.parse(File.read(schema_file))
|
33
|
+
@schema = @proto.types.detect{|schema| schema.name == 'StreamingMutation'}
|
34
|
+
@enc = Avro::IO::BinaryEncoder.new($stdout)
|
35
|
+
# @enc = DummyEncoder.new($stdout)
|
36
|
+
@writer = Avro::IO::DatumWriter.new(@schema)
|
37
|
+
# warn [@schema, @enc].inspect
|
38
|
+
end
|
39
|
+
|
40
|
+
def write key, col_name, value
|
41
|
+
@writer.write(smutation(key, col_name, value), @enc)
|
42
|
+
end
|
43
|
+
|
44
|
+
def write_directly key, col_name, value, timestamp, ttl
|
45
|
+
# Log.info "Insert(row_key => #{key}, col_name => #{col_name}, value => #{value}"
|
46
|
+
@enc.write_bytes(key)
|
47
|
+
@enc.write_bytes(col_name)
|
48
|
+
@enc.write_bytes(value)
|
49
|
+
@enc.write_long(timestamp)
|
50
|
+
@enc.write_int(ttl)
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Iterate through each key value pair in the hash to
|
55
|
+
# be inserted and write directly one at a time
|
56
|
+
#
|
57
|
+
def put id, hsh, timestamp=nil, ttl=0
|
58
|
+
timestamp ||= Time.now.to_i
|
59
|
+
hsh.each do |attr, val|
|
60
|
+
write_directly(id, attr, val, timestamp, ttl)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def smutation key, name, value
|
65
|
+
{
|
66
|
+
'key' => key,
|
67
|
+
'name' => name.to_s,
|
68
|
+
'value' => value.to_s,
|
69
|
+
'timestamp' => Time.epoch_microseconds,
|
70
|
+
'ttl' => 0
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'avro'
|
2
|
+
|
3
|
+
Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
|
4
|
+
|
5
|
+
module Wukong::Store::Cassandra
|
6
|
+
class StructLoader < Wukong::Streamer::StructStreamer
|
7
|
+
def initialize *args
|
8
|
+
super(*args)
|
9
|
+
@log = PeriodicMonitor.new
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Blindly expects objects streaming by to have a "streaming_save" method
|
14
|
+
#
|
15
|
+
def process object, *_
|
16
|
+
# object.save
|
17
|
+
object.streaming_save
|
18
|
+
@log.periodically(object.to_flat)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Store
|
3
|
+
#
|
4
|
+
# Barebones interface between a wukong class and a cassandra database
|
5
|
+
#
|
6
|
+
# Class must somehow provide a class-level cassandra_db accessor
|
7
|
+
# that sets the @cassandra_db instance variable.
|
8
|
+
#
|
9
|
+
module CassandraModel
|
10
|
+
#
|
11
|
+
# Store model to the DB
|
12
|
+
#
|
13
|
+
def save
|
14
|
+
self.class.insert key, self.to_db_hash
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Flatten attributes for storage in the DB.
|
19
|
+
#
|
20
|
+
# * omits elements whose value is nil
|
21
|
+
# * calls to_s on everything else
|
22
|
+
# * This means that blank strings are preserved;
|
23
|
+
# * and that false is saved as 'false'
|
24
|
+
#
|
25
|
+
# Override if you think something fancier than that should happen.
|
26
|
+
#
|
27
|
+
def to_db_hash
|
28
|
+
db_hsh = {}
|
29
|
+
to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
|
30
|
+
db_hsh
|
31
|
+
end
|
32
|
+
|
33
|
+
module ClassMethods
|
34
|
+
# Cassandra column family -- taken from the class name by default.
|
35
|
+
def table_name
|
36
|
+
class_basename
|
37
|
+
end
|
38
|
+
|
39
|
+
# Override to control how your class is instantiated from the DB hash
|
40
|
+
def from_db_hash *args
|
41
|
+
from_hash *args
|
42
|
+
end
|
43
|
+
|
44
|
+
# Insert into the cassandra database
|
45
|
+
# uses object's #to_db_hash method
|
46
|
+
def insert key, *args
|
47
|
+
hsh = args.first
|
48
|
+
cassandra_db.insert(table_name, key.to_s, hsh)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Insert into the cassandra database
|
52
|
+
# calls out to object's #from_db_hash method
|
53
|
+
def load key
|
54
|
+
hsh = cassandra_db.get(self.class_basename, key.to_s)
|
55
|
+
from_db_hash(hsh) if hsh
|
56
|
+
end
|
57
|
+
|
58
|
+
# invalidates cassandra connection on errors where that makes sense.
|
59
|
+
def handle_error action, e
|
60
|
+
warn "#{action} failed: #{e} #{e.backtrace.join("\t")}" ;
|
61
|
+
@cassandra_db = nil
|
62
|
+
sleep 0.2
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# The standard 'inject class methods when module is included' trick
|
66
|
+
def self.included base
|
67
|
+
base.class_eval{ extend ClassMethods}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
Hash.class_eval do
|
75
|
+
#
|
76
|
+
# Flatten attributes for storage in the DB.
|
77
|
+
#
|
78
|
+
# * omits elements whose value is nil
|
79
|
+
# * calls to_s on everything else
|
80
|
+
# * This means that blank strings are preserved;
|
81
|
+
# * and that false is saved as 'false'
|
82
|
+
#
|
83
|
+
# Override if you think something fancier than that should happen.
|
84
|
+
#
|
85
|
+
def to_db_hash
|
86
|
+
db_hsh = {}
|
87
|
+
to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
|
88
|
+
db_hsh
|
89
|
+
end
|
90
|
+
end
|
@@ -12,7 +12,7 @@ module Wukong
|
|
12
12
|
def initialize options={}
|
13
13
|
# super wants a :filename in the options or it will fail. We need to get the initial filename
|
14
14
|
# set up before we call super, so we need all of the parts of the pattern set up.
|
15
|
-
self.rootdir = options[:rootdir]
|
15
|
+
self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
|
16
16
|
self.handle = options[:handle]
|
17
17
|
pattern = options[:pattern] || Settings[:chunk_file_pattern]
|
18
18
|
self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
|
@@ -1,41 +1,45 @@
|
|
1
|
+
require 'wukong/monitor/periodic_monitor'
|
1
2
|
module Wukong
|
2
3
|
module Store
|
3
4
|
class ChunkedFlatFileStore < Wukong::Store::FlatFileStore
|
4
5
|
attr_accessor :filename_pattern, :chunk_monitor, :handle, :chunktime, :rootdir
|
5
6
|
|
6
7
|
# Move to configliere
|
7
|
-
Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle
|
8
|
-
Settings.define :
|
9
|
-
Settings.define :chunk_file_rootdir, :default =>
|
10
|
-
|
8
|
+
Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle-:timestamp-:pid.tsv",:description => "The pattern for chunked files."
|
9
|
+
Settings.define :chunk_file_interval, :default => 4*60*60, :description => "The time interval to keep a chunk file open."
|
10
|
+
Settings.define :chunk_file_rootdir, :default => '/tmp', :description => "The root directory for the chunked files."
|
11
|
+
|
11
12
|
#Note that filemode is inherited from flat_file
|
12
13
|
|
13
14
|
def initialize options={}
|
14
15
|
# super wants a :filename in the options or it will fail. We need to get the initial filename
|
15
|
-
# set up before we call super, so we need all of the parts of the pattern set up.
|
16
|
-
self.chunktime = options[:
|
17
|
-
self.rootdir = options[:rootdir]
|
18
|
-
self.handle = options[:handle]
|
19
|
-
pattern = options[:pattern]
|
16
|
+
# set up before we call super, so we need all of the parts of the pattern set up.
|
17
|
+
self.chunktime = options[:interval] || Settings[:chunk_file_interval]
|
18
|
+
self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
|
19
|
+
self.handle = options[:handle]
|
20
|
+
pattern = options[:pattern] || Settings[:chunk_file_pattern]
|
20
21
|
self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
|
21
|
-
options[:filename] = filename_pattern.make()
|
22
|
-
|
22
|
+
options[:filename] = filename_pattern.make()
|
23
|
+
options[:filemode] ||= 'a'
|
24
|
+
Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
|
25
|
+
self.chunk_monitor = Wukong::Monitor::PeriodicMonitor.new( :time => self.chunktime )
|
26
|
+
|
23
27
|
super options
|
28
|
+
self.mkdir!
|
29
|
+
end
|
24
30
|
|
25
|
-
|
26
|
-
|
31
|
+
def new_chunk!
|
32
|
+
new_filename = filename_pattern.make()
|
33
|
+
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
34
|
+
self.flush
|
35
|
+
self.close
|
36
|
+
@filename = new_filename
|
27
37
|
self.mkdir!
|
28
38
|
end
|
29
39
|
|
30
40
|
def save *args
|
31
41
|
result = super *args
|
32
|
-
chunk_monitor.periodically
|
33
|
-
new_filename = filename_pattern.make()
|
34
|
-
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
35
|
-
self.close
|
36
|
-
@filename = new_filename
|
37
|
-
self.mkdir!
|
38
|
-
end
|
42
|
+
chunk_monitor.periodically{ new_chunk! }
|
39
43
|
result
|
40
44
|
end
|
41
45
|
|
data/wukong.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{wukong}
|
8
|
-
s.version = "1.5.
|
8
|
+
s.version = "1.5.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Philip (flip) Kromer"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-11-02}
|
13
13
|
s.description = %q{ Treat your dataset like a:
|
14
14
|
|
15
15
|
* stream of lines when it's efficient to process by lines
|
@@ -30,10 +30,10 @@ Gem::Specification.new do |s|
|
|
30
30
|
"LICENSE.textile",
|
31
31
|
"README.textile",
|
32
32
|
"TODO.textile",
|
33
|
-
"bin/bootstrap.sh",
|
34
33
|
"bin/cutc",
|
35
34
|
"bin/cuttab",
|
36
35
|
"bin/greptrue",
|
36
|
+
"bin/hdp-bin",
|
37
37
|
"bin/hdp-bzip",
|
38
38
|
"bin/hdp-cat",
|
39
39
|
"bin/hdp-catd",
|
@@ -75,6 +75,7 @@ Gem::Specification.new do |s|
|
|
75
75
|
"docpages/UsingWukong-part3-parsing.textile",
|
76
76
|
"docpages/_config.yml",
|
77
77
|
"docpages/avro/avro_notes.textile",
|
78
|
+
"docpages/avro/performance.textile",
|
78
79
|
"docpages/avro/tethering.textile",
|
79
80
|
"docpages/bigdata-tips.textile",
|
80
81
|
"docpages/code/api_response_example.txt",
|
@@ -129,6 +130,21 @@ Gem::Specification.new do |s|
|
|
129
130
|
"docpages/wutils.textile",
|
130
131
|
"examples/README.txt",
|
131
132
|
"examples/binning_percentile_estimator.rb",
|
133
|
+
"examples/cassandra_streaming/avromapper.rb",
|
134
|
+
"examples/cassandra_streaming/berlitz_for_cassandra.textile",
|
135
|
+
"examples/cassandra_streaming/cassandra.avpr",
|
136
|
+
"examples/cassandra_streaming/cassandra_random_partitioner.rb",
|
137
|
+
"examples/cassandra_streaming/catter.sh",
|
138
|
+
"examples/cassandra_streaming/client_interface_notes.textile",
|
139
|
+
"examples/cassandra_streaming/client_schema.avpr",
|
140
|
+
"examples/cassandra_streaming/client_schema.textile",
|
141
|
+
"examples/cassandra_streaming/foofile.avr",
|
142
|
+
"examples/cassandra_streaming/pymap.sh",
|
143
|
+
"examples/cassandra_streaming/pyreduce.sh",
|
144
|
+
"examples/cassandra_streaming/smutation.avpr",
|
145
|
+
"examples/cassandra_streaming/streamer.sh",
|
146
|
+
"examples/cassandra_streaming/struct_loader.rb",
|
147
|
+
"examples/cassandra_streaming/tuning.textile",
|
132
148
|
"examples/contrib/jeans/README.markdown",
|
133
149
|
"examples/contrib/jeans/data/normalized_sizes",
|
134
150
|
"examples/contrib/jeans/data/orders.tsv",
|
@@ -138,8 +154,11 @@ Gem::Specification.new do |s|
|
|
138
154
|
"examples/corpus/words_to_bigrams.rb",
|
139
155
|
"examples/count_keys.rb",
|
140
156
|
"examples/count_keys_at_mapper.rb",
|
157
|
+
"examples/emr/README-elastic_map_reduce.textile",
|
158
|
+
"examples/emr/dot_wukong_dir/credentials.json",
|
159
|
+
"examples/emr/dot_wukong_dir/emr.yaml",
|
160
|
+
"examples/emr/dot_wukong_dir/emr_bootstrap.sh",
|
141
161
|
"examples/emr/elastic_mapreduce_example.rb",
|
142
|
-
"examples/emr/emr.yaml",
|
143
162
|
"examples/keystore/cassandra_batch_test.rb",
|
144
163
|
"examples/keystore/conditional_outputter_example.rb",
|
145
164
|
"examples/network_graph/adjacency_list.rb",
|
@@ -171,6 +190,7 @@ Gem::Specification.new do |s|
|
|
171
190
|
"lib/wukong/datatypes/fake_types.rb",
|
172
191
|
"lib/wukong/dfs.rb",
|
173
192
|
"lib/wukong/encoding.rb",
|
193
|
+
"lib/wukong/encoding/asciize.rb",
|
174
194
|
"lib/wukong/extensions.rb",
|
175
195
|
"lib/wukong/extensions/array.rb",
|
176
196
|
"lib/wukong/extensions/blank.rb",
|
@@ -203,11 +223,16 @@ Gem::Specification.new do |s|
|
|
203
223
|
"lib/wukong/schema.rb",
|
204
224
|
"lib/wukong/script.rb",
|
205
225
|
"lib/wukong/script/avro_command.rb",
|
226
|
+
"lib/wukong/script/cassandra_loader_script.rb",
|
206
227
|
"lib/wukong/script/emr_command.rb",
|
207
228
|
"lib/wukong/script/hadoop_command.rb",
|
208
229
|
"lib/wukong/script/local_command.rb",
|
209
230
|
"lib/wukong/store.rb",
|
210
231
|
"lib/wukong/store/base.rb",
|
232
|
+
"lib/wukong/store/cassandra.rb",
|
233
|
+
"lib/wukong/store/cassandra/streaming.rb",
|
234
|
+
"lib/wukong/store/cassandra/struct_loader.rb",
|
235
|
+
"lib/wukong/store/cassandra_model.rb",
|
211
236
|
"lib/wukong/store/chh_chunked_flat_file_store.rb",
|
212
237
|
"lib/wukong/store/chunked_flat_file_store.rb",
|
213
238
|
"lib/wukong/store/conditional_store.rb",
|
@@ -259,6 +284,9 @@ Gem::Specification.new do |s|
|
|
259
284
|
"spec/wukong/encoding_spec.rb",
|
260
285
|
"spec/wukong/script_spec.rb",
|
261
286
|
"examples/binning_percentile_estimator.rb",
|
287
|
+
"examples/cassandra_streaming/avromapper.rb",
|
288
|
+
"examples/cassandra_streaming/cassandra_random_partitioner.rb",
|
289
|
+
"examples/cassandra_streaming/struct_loader.rb",
|
262
290
|
"examples/contrib/jeans/normalize.rb",
|
263
291
|
"examples/contrib/jeans/sizes.rb",
|
264
292
|
"examples/corpus/words_to_bigrams.rb",
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 5
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 1
|
8
7
|
- 5
|
9
|
-
-
|
10
|
-
version: 1.5.
|
8
|
+
- 4
|
9
|
+
version: 1.5.4
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Philip (flip) Kromer
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2010-
|
17
|
+
date: 2010-11-02 00:00:00 -05:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,7 +25,6 @@ dependencies:
|
|
26
25
|
requirements:
|
27
26
|
- - ">="
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 13
|
30
28
|
segments:
|
31
29
|
- 1
|
32
30
|
- 2
|
@@ -42,7 +40,6 @@ dependencies:
|
|
42
40
|
requirements:
|
43
41
|
- - ">="
|
44
42
|
- !ruby/object:Gem::Version
|
45
|
-
hash: 3
|
46
43
|
segments:
|
47
44
|
- 0
|
48
45
|
version: "0"
|
@@ -56,7 +53,6 @@ dependencies:
|
|
56
53
|
requirements:
|
57
54
|
- - ">="
|
58
55
|
- !ruby/object:Gem::Version
|
59
|
-
hash: 3
|
60
56
|
segments:
|
61
57
|
- 0
|
62
58
|
version: "0"
|
@@ -70,7 +66,6 @@ dependencies:
|
|
70
66
|
requirements:
|
71
67
|
- - ">="
|
72
68
|
- !ruby/object:Gem::Version
|
73
|
-
hash: 3
|
74
69
|
segments:
|
75
70
|
- 0
|
76
71
|
version: "0"
|
@@ -84,7 +79,6 @@ dependencies:
|
|
84
79
|
requirements:
|
85
80
|
- - ">="
|
86
81
|
- !ruby/object:Gem::Version
|
87
|
-
hash: 3
|
88
82
|
segments:
|
89
83
|
- 0
|
90
84
|
version: "0"
|
@@ -98,7 +92,6 @@ dependencies:
|
|
98
92
|
requirements:
|
99
93
|
- - ">="
|
100
94
|
- !ruby/object:Gem::Version
|
101
|
-
hash: 3
|
102
95
|
segments:
|
103
96
|
- 0
|
104
97
|
version: "0"
|
@@ -124,10 +117,10 @@ files:
|
|
124
117
|
- LICENSE.textile
|
125
118
|
- README.textile
|
126
119
|
- TODO.textile
|
127
|
-
- bin/bootstrap.sh
|
128
120
|
- bin/cutc
|
129
121
|
- bin/cuttab
|
130
122
|
- bin/greptrue
|
123
|
+
- bin/hdp-bin
|
131
124
|
- bin/hdp-bzip
|
132
125
|
- bin/hdp-cat
|
133
126
|
- bin/hdp-catd
|
@@ -169,6 +162,7 @@ files:
|
|
169
162
|
- docpages/UsingWukong-part3-parsing.textile
|
170
163
|
- docpages/_config.yml
|
171
164
|
- docpages/avro/avro_notes.textile
|
165
|
+
- docpages/avro/performance.textile
|
172
166
|
- docpages/avro/tethering.textile
|
173
167
|
- docpages/bigdata-tips.textile
|
174
168
|
- docpages/code/api_response_example.txt
|
@@ -223,6 +217,21 @@ files:
|
|
223
217
|
- docpages/wutils.textile
|
224
218
|
- examples/README.txt
|
225
219
|
- examples/binning_percentile_estimator.rb
|
220
|
+
- examples/cassandra_streaming/avromapper.rb
|
221
|
+
- examples/cassandra_streaming/berlitz_for_cassandra.textile
|
222
|
+
- examples/cassandra_streaming/cassandra.avpr
|
223
|
+
- examples/cassandra_streaming/cassandra_random_partitioner.rb
|
224
|
+
- examples/cassandra_streaming/catter.sh
|
225
|
+
- examples/cassandra_streaming/client_interface_notes.textile
|
226
|
+
- examples/cassandra_streaming/client_schema.avpr
|
227
|
+
- examples/cassandra_streaming/client_schema.textile
|
228
|
+
- examples/cassandra_streaming/foofile.avr
|
229
|
+
- examples/cassandra_streaming/pymap.sh
|
230
|
+
- examples/cassandra_streaming/pyreduce.sh
|
231
|
+
- examples/cassandra_streaming/smutation.avpr
|
232
|
+
- examples/cassandra_streaming/streamer.sh
|
233
|
+
- examples/cassandra_streaming/struct_loader.rb
|
234
|
+
- examples/cassandra_streaming/tuning.textile
|
226
235
|
- examples/contrib/jeans/README.markdown
|
227
236
|
- examples/contrib/jeans/data/normalized_sizes
|
228
237
|
- examples/contrib/jeans/data/orders.tsv
|
@@ -232,8 +241,11 @@ files:
|
|
232
241
|
- examples/corpus/words_to_bigrams.rb
|
233
242
|
- examples/count_keys.rb
|
234
243
|
- examples/count_keys_at_mapper.rb
|
244
|
+
- examples/emr/README-elastic_map_reduce.textile
|
245
|
+
- examples/emr/dot_wukong_dir/credentials.json
|
246
|
+
- examples/emr/dot_wukong_dir/emr.yaml
|
247
|
+
- examples/emr/dot_wukong_dir/emr_bootstrap.sh
|
235
248
|
- examples/emr/elastic_mapreduce_example.rb
|
236
|
-
- examples/emr/emr.yaml
|
237
249
|
- examples/keystore/cassandra_batch_test.rb
|
238
250
|
- examples/keystore/conditional_outputter_example.rb
|
239
251
|
- examples/network_graph/adjacency_list.rb
|
@@ -265,6 +277,7 @@ files:
|
|
265
277
|
- lib/wukong/datatypes/fake_types.rb
|
266
278
|
- lib/wukong/dfs.rb
|
267
279
|
- lib/wukong/encoding.rb
|
280
|
+
- lib/wukong/encoding/asciize.rb
|
268
281
|
- lib/wukong/extensions.rb
|
269
282
|
- lib/wukong/extensions/array.rb
|
270
283
|
- lib/wukong/extensions/blank.rb
|
@@ -297,11 +310,16 @@ files:
|
|
297
310
|
- lib/wukong/schema.rb
|
298
311
|
- lib/wukong/script.rb
|
299
312
|
- lib/wukong/script/avro_command.rb
|
313
|
+
- lib/wukong/script/cassandra_loader_script.rb
|
300
314
|
- lib/wukong/script/emr_command.rb
|
301
315
|
- lib/wukong/script/hadoop_command.rb
|
302
316
|
- lib/wukong/script/local_command.rb
|
303
317
|
- lib/wukong/store.rb
|
304
318
|
- lib/wukong/store/base.rb
|
319
|
+
- lib/wukong/store/cassandra.rb
|
320
|
+
- lib/wukong/store/cassandra/streaming.rb
|
321
|
+
- lib/wukong/store/cassandra/struct_loader.rb
|
322
|
+
- lib/wukong/store/cassandra_model.rb
|
305
323
|
- lib/wukong/store/chh_chunked_flat_file_store.rb
|
306
324
|
- lib/wukong/store/chunked_flat_file_store.rb
|
307
325
|
- lib/wukong/store/conditional_store.rb
|
@@ -356,7 +374,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
356
374
|
requirements:
|
357
375
|
- - ">="
|
358
376
|
- !ruby/object:Gem::Version
|
359
|
-
hash: 3
|
360
377
|
segments:
|
361
378
|
- 0
|
362
379
|
version: "0"
|
@@ -365,7 +382,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
365
382
|
requirements:
|
366
383
|
- - ">="
|
367
384
|
- !ruby/object:Gem::Version
|
368
|
-
hash: 3
|
369
385
|
segments:
|
370
386
|
- 0
|
371
387
|
version: "0"
|
@@ -381,6 +397,9 @@ test_files:
|
|
381
397
|
- spec/wukong/encoding_spec.rb
|
382
398
|
- spec/wukong/script_spec.rb
|
383
399
|
- examples/binning_percentile_estimator.rb
|
400
|
+
- examples/cassandra_streaming/avromapper.rb
|
401
|
+
- examples/cassandra_streaming/cassandra_random_partitioner.rb
|
402
|
+
- examples/cassandra_streaming/struct_loader.rb
|
384
403
|
- examples/contrib/jeans/normalize.rb
|
385
404
|
- examples/contrib/jeans/sizes.rb
|
386
405
|
- examples/corpus/words_to_bigrams.rb
|