wukong 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.textile +4 -0
- data/bin/hdp-bin +44 -0
- data/bin/hdp-ls +2 -1
- data/docpages/avro/performance.textile +36 -0
- data/examples/cassandra_streaming/avromapper.rb +85 -0
- data/examples/cassandra_streaming/berlitz_for_cassandra.textile +22 -0
- data/examples/cassandra_streaming/cassandra.avpr +468 -0
- data/examples/cassandra_streaming/cassandra_random_partitioner.rb +62 -0
- data/examples/cassandra_streaming/catter.sh +45 -0
- data/examples/cassandra_streaming/client_interface_notes.textile +200 -0
- data/examples/cassandra_streaming/client_schema.avpr +211 -0
- data/examples/cassandra_streaming/client_schema.textile +318 -0
- data/examples/cassandra_streaming/foofile.avr +0 -0
- data/examples/cassandra_streaming/pymap.sh +1 -0
- data/examples/cassandra_streaming/pyreduce.sh +1 -0
- data/examples/cassandra_streaming/smutation.avpr +188 -0
- data/examples/cassandra_streaming/streamer.sh +51 -0
- data/examples/cassandra_streaming/struct_loader.rb +24 -0
- data/examples/cassandra_streaming/tuning.textile +73 -0
- data/examples/emr/README-elastic_map_reduce.textile +26 -0
- data/examples/emr/dot_wukong_dir/credentials.json +7 -0
- data/examples/emr/{emr.yaml → dot_wukong_dir/emr.yaml} +33 -16
- data/{bin/bootstrap.sh → examples/emr/dot_wukong_dir/emr_bootstrap.sh} +1 -1
- data/examples/emr/elastic_mapreduce_example.rb +1 -0
- data/lib/wukong/encoding/asciize.rb +108 -0
- data/lib/wukong/extensions/date_time.rb +33 -7
- data/lib/wukong/extensions/emittable.rb +12 -25
- data/lib/wukong/extensions/hash_like.rb +13 -6
- data/lib/wukong/filename_pattern.rb +8 -7
- data/lib/wukong/schema.rb +47 -0
- data/lib/wukong/script.rb +7 -0
- data/lib/wukong/script/cassandra_loader_script.rb +40 -0
- data/lib/wukong/script/emr_command.rb +74 -43
- data/lib/wukong/script/hadoop_command.rb +89 -72
- data/lib/wukong/store.rb +2 -7
- data/lib/wukong/store/cassandra.rb +10 -0
- data/lib/wukong/store/cassandra/streaming.rb +75 -0
- data/lib/wukong/store/cassandra/struct_loader.rb +21 -0
- data/lib/wukong/store/cassandra_model.rb +90 -0
- data/lib/wukong/store/chh_chunked_flat_file_store.rb +1 -1
- data/lib/wukong/store/chunked_flat_file_store.rb +24 -20
- data/wukong.gemspec +32 -4
- metadata +33 -14
data/lib/wukong/store.rb
CHANGED
@@ -1,15 +1,10 @@
|
|
1
1
|
module Wukong
|
2
2
|
module Store
|
3
|
-
# extend FactoryModule
|
4
3
|
autoload :Base, 'wukong/store/base'
|
5
4
|
autoload :FlatFileStore, 'wukong/store/flat_file_store'
|
6
|
-
# autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
|
7
5
|
autoload :ChunkedFlatFileStore, 'wukong/store/chunked_flat_file_store'
|
8
6
|
autoload :ChhChunkedFlatFileStore, 'wukong/store/chh_chunked_flat_file_store'
|
9
|
-
|
10
|
-
|
11
|
-
# autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
|
12
|
-
# autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
|
13
|
-
# autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
|
7
|
+
|
8
|
+
autoload :CassandraModel, 'wukong/store/cassandra_model'
|
14
9
|
end
|
15
10
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Settings.define :cassandra_hosts, :default => '127.0.0.1:9160', :type => Array, :description => 'Comma-delimited list of hostname:port addresses for the Cassandra database holding Twitter API objects'
|
2
|
+
Settings.define :cassandra_keyspace, :default => 'soc_net_tw', :description => 'Cassandra keyspace for Twitter objects'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
module Store
|
6
|
+
module CassandraStore
|
7
|
+
autoload :StructLoader, 'wukong/store/cassandra/struct_loader'
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'avro'
|
2
|
+
|
3
|
+
Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
|
4
|
+
module Wukong::Store::CassandraModel
|
5
|
+
|
6
|
+
#
|
7
|
+
# Store model using avro writer
|
8
|
+
#
|
9
|
+
def streaming_save
|
10
|
+
self.class.streaming_insert id, self
|
11
|
+
end
|
12
|
+
module ClassMethods
|
13
|
+
|
14
|
+
def streaming_writer
|
15
|
+
@streaming_writer ||= AvroWriter.new
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Use avro and stream into cassandra
|
20
|
+
#
|
21
|
+
def streaming_insert id, hsh
|
22
|
+
streaming_writer.put(id.to_s, hsh.to_db_hash)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
class AvroWriter
|
26
|
+
#
|
27
|
+
# Reads in the protocol schema
|
28
|
+
# creates the necessary encoder and writer.
|
29
|
+
#
|
30
|
+
def initialize
|
31
|
+
schema_file = Settings.cassandra_avro_schema
|
32
|
+
@proto = Avro::Protocol.parse(File.read(schema_file))
|
33
|
+
@schema = @proto.types.detect{|schema| schema.name == 'StreamingMutation'}
|
34
|
+
@enc = Avro::IO::BinaryEncoder.new($stdout)
|
35
|
+
# @enc = DummyEncoder.new($stdout)
|
36
|
+
@writer = Avro::IO::DatumWriter.new(@schema)
|
37
|
+
# warn [@schema, @enc].inspect
|
38
|
+
end
|
39
|
+
|
40
|
+
def write key, col_name, value
|
41
|
+
@writer.write(smutation(key, col_name, value), @enc)
|
42
|
+
end
|
43
|
+
|
44
|
+
def write_directly key, col_name, value, timestamp, ttl
|
45
|
+
# Log.info "Insert(row_key => #{key}, col_name => #{col_name}, value => #{value}"
|
46
|
+
@enc.write_bytes(key)
|
47
|
+
@enc.write_bytes(col_name)
|
48
|
+
@enc.write_bytes(value)
|
49
|
+
@enc.write_long(timestamp)
|
50
|
+
@enc.write_int(ttl)
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Iterate through each key value pair in the hash to
|
55
|
+
# be inserted and write directly one at a time
|
56
|
+
#
|
57
|
+
def put id, hsh, timestamp=nil, ttl=0
|
58
|
+
timestamp ||= Time.now.to_i
|
59
|
+
hsh.each do |attr, val|
|
60
|
+
write_directly(id, attr, val, timestamp, ttl)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def smutation key, name, value
|
65
|
+
{
|
66
|
+
'key' => key,
|
67
|
+
'name' => name.to_s,
|
68
|
+
'value' => value.to_s,
|
69
|
+
'timestamp' => Time.epoch_microseconds,
|
70
|
+
'ttl' => 0
|
71
|
+
}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'avro'
|
2
|
+
|
3
|
+
Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
|
4
|
+
|
5
|
+
module Wukong::Store::Cassandra
|
6
|
+
class StructLoader < Wukong::Streamer::StructStreamer
|
7
|
+
def initialize *args
|
8
|
+
super(*args)
|
9
|
+
@log = PeriodicMonitor.new
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Blindly expects objects streaming by to have a "streaming_save" method
|
14
|
+
#
|
15
|
+
def process object, *_
|
16
|
+
# object.save
|
17
|
+
object.streaming_save
|
18
|
+
@log.periodically(object.to_flat)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Store
|
3
|
+
#
|
4
|
+
# Barebones interface between a wukong class and a cassandra database
|
5
|
+
#
|
6
|
+
# Class must somehow provide a class-level cassandra_db accessor
|
7
|
+
# that sets the @cassandra_db instance variable.
|
8
|
+
#
|
9
|
+
module CassandraModel
|
10
|
+
#
|
11
|
+
# Store model to the DB
|
12
|
+
#
|
13
|
+
def save
|
14
|
+
self.class.insert key, self.to_db_hash
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Flatten attributes for storage in the DB.
|
19
|
+
#
|
20
|
+
# * omits elements whose value is nil
|
21
|
+
# * calls to_s on everything else
|
22
|
+
# * This means that blank strings are preserved;
|
23
|
+
# * and that false is saved as 'false'
|
24
|
+
#
|
25
|
+
# Override if you think something fancier than that should happen.
|
26
|
+
#
|
27
|
+
def to_db_hash
|
28
|
+
db_hsh = {}
|
29
|
+
to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
|
30
|
+
db_hsh
|
31
|
+
end
|
32
|
+
|
33
|
+
module ClassMethods
|
34
|
+
# Cassandra column family -- taken from the class name by default.
|
35
|
+
def table_name
|
36
|
+
class_basename
|
37
|
+
end
|
38
|
+
|
39
|
+
# Override to control how your class is instantiated from the DB hash
|
40
|
+
def from_db_hash *args
|
41
|
+
from_hash *args
|
42
|
+
end
|
43
|
+
|
44
|
+
# Insert into the cassandra database
|
45
|
+
# uses object's #to_db_hash method
|
46
|
+
def insert key, *args
|
47
|
+
hsh = args.first
|
48
|
+
cassandra_db.insert(table_name, key.to_s, hsh)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Insert into the cassandra database
|
52
|
+
# calls out to object's #from_db_hash method
|
53
|
+
def load key
|
54
|
+
hsh = cassandra_db.get(self.class_basename, key.to_s)
|
55
|
+
from_db_hash(hsh) if hsh
|
56
|
+
end
|
57
|
+
|
58
|
+
# invalidates cassandra connection on errors where that makes sense.
|
59
|
+
def handle_error action, e
|
60
|
+
warn "#{action} failed: #{e} #{e.backtrace.join("\t")}" ;
|
61
|
+
@cassandra_db = nil
|
62
|
+
sleep 0.2
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# The standard 'inject class methods when module is included' trick
|
66
|
+
def self.included base
|
67
|
+
base.class_eval{ extend ClassMethods}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
Hash.class_eval do
|
75
|
+
#
|
76
|
+
# Flatten attributes for storage in the DB.
|
77
|
+
#
|
78
|
+
# * omits elements whose value is nil
|
79
|
+
# * calls to_s on everything else
|
80
|
+
# * This means that blank strings are preserved;
|
81
|
+
# * and that false is saved as 'false'
|
82
|
+
#
|
83
|
+
# Override if you think something fancier than that should happen.
|
84
|
+
#
|
85
|
+
def to_db_hash
|
86
|
+
db_hsh = {}
|
87
|
+
to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
|
88
|
+
db_hsh
|
89
|
+
end
|
90
|
+
end
|
@@ -12,7 +12,7 @@ module Wukong
|
|
12
12
|
def initialize options={}
|
13
13
|
# super wants a :filename in the options or it will fail. We need to get the initial filename
|
14
14
|
# set up before we call super, so we need all of the parts of the pattern set up.
|
15
|
-
self.rootdir = options[:rootdir]
|
15
|
+
self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
|
16
16
|
self.handle = options[:handle]
|
17
17
|
pattern = options[:pattern] || Settings[:chunk_file_pattern]
|
18
18
|
self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
|
@@ -1,41 +1,45 @@
|
|
1
|
+
require 'wukong/monitor/periodic_monitor'
|
1
2
|
module Wukong
|
2
3
|
module Store
|
3
4
|
class ChunkedFlatFileStore < Wukong::Store::FlatFileStore
|
4
5
|
attr_accessor :filename_pattern, :chunk_monitor, :handle, :chunktime, :rootdir
|
5
6
|
|
6
7
|
# Move to configliere
|
7
|
-
Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle
|
8
|
-
Settings.define :
|
9
|
-
Settings.define :chunk_file_rootdir, :default =>
|
10
|
-
|
8
|
+
Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle-:timestamp-:pid.tsv",:description => "The pattern for chunked files."
|
9
|
+
Settings.define :chunk_file_interval, :default => 4*60*60, :description => "The time interval to keep a chunk file open."
|
10
|
+
Settings.define :chunk_file_rootdir, :default => '/tmp', :description => "The root directory for the chunked files."
|
11
|
+
|
11
12
|
#Note that filemode is inherited from flat_file
|
12
13
|
|
13
14
|
def initialize options={}
|
14
15
|
# super wants a :filename in the options or it will fail. We need to get the initial filename
|
15
|
-
# set up before we call super, so we need all of the parts of the pattern set up.
|
16
|
-
self.chunktime = options[:
|
17
|
-
self.rootdir = options[:rootdir]
|
18
|
-
self.handle = options[:handle]
|
19
|
-
pattern = options[:pattern]
|
16
|
+
# set up before we call super, so we need all of the parts of the pattern set up.
|
17
|
+
self.chunktime = options[:interval] || Settings[:chunk_file_interval]
|
18
|
+
self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
|
19
|
+
self.handle = options[:handle]
|
20
|
+
pattern = options[:pattern] || Settings[:chunk_file_pattern]
|
20
21
|
self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
|
21
|
-
options[:filename] = filename_pattern.make()
|
22
|
-
|
22
|
+
options[:filename] = filename_pattern.make()
|
23
|
+
options[:filemode] ||= 'a'
|
24
|
+
Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
|
25
|
+
self.chunk_monitor = Wukong::Monitor::PeriodicMonitor.new( :time => self.chunktime )
|
26
|
+
|
23
27
|
super options
|
28
|
+
self.mkdir!
|
29
|
+
end
|
24
30
|
|
25
|
-
|
26
|
-
|
31
|
+
def new_chunk!
|
32
|
+
new_filename = filename_pattern.make()
|
33
|
+
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
34
|
+
self.flush
|
35
|
+
self.close
|
36
|
+
@filename = new_filename
|
27
37
|
self.mkdir!
|
28
38
|
end
|
29
39
|
|
30
40
|
def save *args
|
31
41
|
result = super *args
|
32
|
-
chunk_monitor.periodically
|
33
|
-
new_filename = filename_pattern.make()
|
34
|
-
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
35
|
-
self.close
|
36
|
-
@filename = new_filename
|
37
|
-
self.mkdir!
|
38
|
-
end
|
42
|
+
chunk_monitor.periodically{ new_chunk! }
|
39
43
|
result
|
40
44
|
end
|
41
45
|
|
data/wukong.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{wukong}
|
8
|
-
s.version = "1.5.
|
8
|
+
s.version = "1.5.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Philip (flip) Kromer"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-11-02}
|
13
13
|
s.description = %q{ Treat your dataset like a:
|
14
14
|
|
15
15
|
* stream of lines when it's efficient to process by lines
|
@@ -30,10 +30,10 @@ Gem::Specification.new do |s|
|
|
30
30
|
"LICENSE.textile",
|
31
31
|
"README.textile",
|
32
32
|
"TODO.textile",
|
33
|
-
"bin/bootstrap.sh",
|
34
33
|
"bin/cutc",
|
35
34
|
"bin/cuttab",
|
36
35
|
"bin/greptrue",
|
36
|
+
"bin/hdp-bin",
|
37
37
|
"bin/hdp-bzip",
|
38
38
|
"bin/hdp-cat",
|
39
39
|
"bin/hdp-catd",
|
@@ -75,6 +75,7 @@ Gem::Specification.new do |s|
|
|
75
75
|
"docpages/UsingWukong-part3-parsing.textile",
|
76
76
|
"docpages/_config.yml",
|
77
77
|
"docpages/avro/avro_notes.textile",
|
78
|
+
"docpages/avro/performance.textile",
|
78
79
|
"docpages/avro/tethering.textile",
|
79
80
|
"docpages/bigdata-tips.textile",
|
80
81
|
"docpages/code/api_response_example.txt",
|
@@ -129,6 +130,21 @@ Gem::Specification.new do |s|
|
|
129
130
|
"docpages/wutils.textile",
|
130
131
|
"examples/README.txt",
|
131
132
|
"examples/binning_percentile_estimator.rb",
|
133
|
+
"examples/cassandra_streaming/avromapper.rb",
|
134
|
+
"examples/cassandra_streaming/berlitz_for_cassandra.textile",
|
135
|
+
"examples/cassandra_streaming/cassandra.avpr",
|
136
|
+
"examples/cassandra_streaming/cassandra_random_partitioner.rb",
|
137
|
+
"examples/cassandra_streaming/catter.sh",
|
138
|
+
"examples/cassandra_streaming/client_interface_notes.textile",
|
139
|
+
"examples/cassandra_streaming/client_schema.avpr",
|
140
|
+
"examples/cassandra_streaming/client_schema.textile",
|
141
|
+
"examples/cassandra_streaming/foofile.avr",
|
142
|
+
"examples/cassandra_streaming/pymap.sh",
|
143
|
+
"examples/cassandra_streaming/pyreduce.sh",
|
144
|
+
"examples/cassandra_streaming/smutation.avpr",
|
145
|
+
"examples/cassandra_streaming/streamer.sh",
|
146
|
+
"examples/cassandra_streaming/struct_loader.rb",
|
147
|
+
"examples/cassandra_streaming/tuning.textile",
|
132
148
|
"examples/contrib/jeans/README.markdown",
|
133
149
|
"examples/contrib/jeans/data/normalized_sizes",
|
134
150
|
"examples/contrib/jeans/data/orders.tsv",
|
@@ -138,8 +154,11 @@ Gem::Specification.new do |s|
|
|
138
154
|
"examples/corpus/words_to_bigrams.rb",
|
139
155
|
"examples/count_keys.rb",
|
140
156
|
"examples/count_keys_at_mapper.rb",
|
157
|
+
"examples/emr/README-elastic_map_reduce.textile",
|
158
|
+
"examples/emr/dot_wukong_dir/credentials.json",
|
159
|
+
"examples/emr/dot_wukong_dir/emr.yaml",
|
160
|
+
"examples/emr/dot_wukong_dir/emr_bootstrap.sh",
|
141
161
|
"examples/emr/elastic_mapreduce_example.rb",
|
142
|
-
"examples/emr/emr.yaml",
|
143
162
|
"examples/keystore/cassandra_batch_test.rb",
|
144
163
|
"examples/keystore/conditional_outputter_example.rb",
|
145
164
|
"examples/network_graph/adjacency_list.rb",
|
@@ -171,6 +190,7 @@ Gem::Specification.new do |s|
|
|
171
190
|
"lib/wukong/datatypes/fake_types.rb",
|
172
191
|
"lib/wukong/dfs.rb",
|
173
192
|
"lib/wukong/encoding.rb",
|
193
|
+
"lib/wukong/encoding/asciize.rb",
|
174
194
|
"lib/wukong/extensions.rb",
|
175
195
|
"lib/wukong/extensions/array.rb",
|
176
196
|
"lib/wukong/extensions/blank.rb",
|
@@ -203,11 +223,16 @@ Gem::Specification.new do |s|
|
|
203
223
|
"lib/wukong/schema.rb",
|
204
224
|
"lib/wukong/script.rb",
|
205
225
|
"lib/wukong/script/avro_command.rb",
|
226
|
+
"lib/wukong/script/cassandra_loader_script.rb",
|
206
227
|
"lib/wukong/script/emr_command.rb",
|
207
228
|
"lib/wukong/script/hadoop_command.rb",
|
208
229
|
"lib/wukong/script/local_command.rb",
|
209
230
|
"lib/wukong/store.rb",
|
210
231
|
"lib/wukong/store/base.rb",
|
232
|
+
"lib/wukong/store/cassandra.rb",
|
233
|
+
"lib/wukong/store/cassandra/streaming.rb",
|
234
|
+
"lib/wukong/store/cassandra/struct_loader.rb",
|
235
|
+
"lib/wukong/store/cassandra_model.rb",
|
211
236
|
"lib/wukong/store/chh_chunked_flat_file_store.rb",
|
212
237
|
"lib/wukong/store/chunked_flat_file_store.rb",
|
213
238
|
"lib/wukong/store/conditional_store.rb",
|
@@ -259,6 +284,9 @@ Gem::Specification.new do |s|
|
|
259
284
|
"spec/wukong/encoding_spec.rb",
|
260
285
|
"spec/wukong/script_spec.rb",
|
261
286
|
"examples/binning_percentile_estimator.rb",
|
287
|
+
"examples/cassandra_streaming/avromapper.rb",
|
288
|
+
"examples/cassandra_streaming/cassandra_random_partitioner.rb",
|
289
|
+
"examples/cassandra_streaming/struct_loader.rb",
|
262
290
|
"examples/contrib/jeans/normalize.rb",
|
263
291
|
"examples/contrib/jeans/sizes.rb",
|
264
292
|
"examples/corpus/words_to_bigrams.rb",
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 5
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 1
|
8
7
|
- 5
|
9
|
-
-
|
10
|
-
version: 1.5.
|
8
|
+
- 4
|
9
|
+
version: 1.5.4
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Philip (flip) Kromer
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2010-
|
17
|
+
date: 2010-11-02 00:00:00 -05:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,7 +25,6 @@ dependencies:
|
|
26
25
|
requirements:
|
27
26
|
- - ">="
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 13
|
30
28
|
segments:
|
31
29
|
- 1
|
32
30
|
- 2
|
@@ -42,7 +40,6 @@ dependencies:
|
|
42
40
|
requirements:
|
43
41
|
- - ">="
|
44
42
|
- !ruby/object:Gem::Version
|
45
|
-
hash: 3
|
46
43
|
segments:
|
47
44
|
- 0
|
48
45
|
version: "0"
|
@@ -56,7 +53,6 @@ dependencies:
|
|
56
53
|
requirements:
|
57
54
|
- - ">="
|
58
55
|
- !ruby/object:Gem::Version
|
59
|
-
hash: 3
|
60
56
|
segments:
|
61
57
|
- 0
|
62
58
|
version: "0"
|
@@ -70,7 +66,6 @@ dependencies:
|
|
70
66
|
requirements:
|
71
67
|
- - ">="
|
72
68
|
- !ruby/object:Gem::Version
|
73
|
-
hash: 3
|
74
69
|
segments:
|
75
70
|
- 0
|
76
71
|
version: "0"
|
@@ -84,7 +79,6 @@ dependencies:
|
|
84
79
|
requirements:
|
85
80
|
- - ">="
|
86
81
|
- !ruby/object:Gem::Version
|
87
|
-
hash: 3
|
88
82
|
segments:
|
89
83
|
- 0
|
90
84
|
version: "0"
|
@@ -98,7 +92,6 @@ dependencies:
|
|
98
92
|
requirements:
|
99
93
|
- - ">="
|
100
94
|
- !ruby/object:Gem::Version
|
101
|
-
hash: 3
|
102
95
|
segments:
|
103
96
|
- 0
|
104
97
|
version: "0"
|
@@ -124,10 +117,10 @@ files:
|
|
124
117
|
- LICENSE.textile
|
125
118
|
- README.textile
|
126
119
|
- TODO.textile
|
127
|
-
- bin/bootstrap.sh
|
128
120
|
- bin/cutc
|
129
121
|
- bin/cuttab
|
130
122
|
- bin/greptrue
|
123
|
+
- bin/hdp-bin
|
131
124
|
- bin/hdp-bzip
|
132
125
|
- bin/hdp-cat
|
133
126
|
- bin/hdp-catd
|
@@ -169,6 +162,7 @@ files:
|
|
169
162
|
- docpages/UsingWukong-part3-parsing.textile
|
170
163
|
- docpages/_config.yml
|
171
164
|
- docpages/avro/avro_notes.textile
|
165
|
+
- docpages/avro/performance.textile
|
172
166
|
- docpages/avro/tethering.textile
|
173
167
|
- docpages/bigdata-tips.textile
|
174
168
|
- docpages/code/api_response_example.txt
|
@@ -223,6 +217,21 @@ files:
|
|
223
217
|
- docpages/wutils.textile
|
224
218
|
- examples/README.txt
|
225
219
|
- examples/binning_percentile_estimator.rb
|
220
|
+
- examples/cassandra_streaming/avromapper.rb
|
221
|
+
- examples/cassandra_streaming/berlitz_for_cassandra.textile
|
222
|
+
- examples/cassandra_streaming/cassandra.avpr
|
223
|
+
- examples/cassandra_streaming/cassandra_random_partitioner.rb
|
224
|
+
- examples/cassandra_streaming/catter.sh
|
225
|
+
- examples/cassandra_streaming/client_interface_notes.textile
|
226
|
+
- examples/cassandra_streaming/client_schema.avpr
|
227
|
+
- examples/cassandra_streaming/client_schema.textile
|
228
|
+
- examples/cassandra_streaming/foofile.avr
|
229
|
+
- examples/cassandra_streaming/pymap.sh
|
230
|
+
- examples/cassandra_streaming/pyreduce.sh
|
231
|
+
- examples/cassandra_streaming/smutation.avpr
|
232
|
+
- examples/cassandra_streaming/streamer.sh
|
233
|
+
- examples/cassandra_streaming/struct_loader.rb
|
234
|
+
- examples/cassandra_streaming/tuning.textile
|
226
235
|
- examples/contrib/jeans/README.markdown
|
227
236
|
- examples/contrib/jeans/data/normalized_sizes
|
228
237
|
- examples/contrib/jeans/data/orders.tsv
|
@@ -232,8 +241,11 @@ files:
|
|
232
241
|
- examples/corpus/words_to_bigrams.rb
|
233
242
|
- examples/count_keys.rb
|
234
243
|
- examples/count_keys_at_mapper.rb
|
244
|
+
- examples/emr/README-elastic_map_reduce.textile
|
245
|
+
- examples/emr/dot_wukong_dir/credentials.json
|
246
|
+
- examples/emr/dot_wukong_dir/emr.yaml
|
247
|
+
- examples/emr/dot_wukong_dir/emr_bootstrap.sh
|
235
248
|
- examples/emr/elastic_mapreduce_example.rb
|
236
|
-
- examples/emr/emr.yaml
|
237
249
|
- examples/keystore/cassandra_batch_test.rb
|
238
250
|
- examples/keystore/conditional_outputter_example.rb
|
239
251
|
- examples/network_graph/adjacency_list.rb
|
@@ -265,6 +277,7 @@ files:
|
|
265
277
|
- lib/wukong/datatypes/fake_types.rb
|
266
278
|
- lib/wukong/dfs.rb
|
267
279
|
- lib/wukong/encoding.rb
|
280
|
+
- lib/wukong/encoding/asciize.rb
|
268
281
|
- lib/wukong/extensions.rb
|
269
282
|
- lib/wukong/extensions/array.rb
|
270
283
|
- lib/wukong/extensions/blank.rb
|
@@ -297,11 +310,16 @@ files:
|
|
297
310
|
- lib/wukong/schema.rb
|
298
311
|
- lib/wukong/script.rb
|
299
312
|
- lib/wukong/script/avro_command.rb
|
313
|
+
- lib/wukong/script/cassandra_loader_script.rb
|
300
314
|
- lib/wukong/script/emr_command.rb
|
301
315
|
- lib/wukong/script/hadoop_command.rb
|
302
316
|
- lib/wukong/script/local_command.rb
|
303
317
|
- lib/wukong/store.rb
|
304
318
|
- lib/wukong/store/base.rb
|
319
|
+
- lib/wukong/store/cassandra.rb
|
320
|
+
- lib/wukong/store/cassandra/streaming.rb
|
321
|
+
- lib/wukong/store/cassandra/struct_loader.rb
|
322
|
+
- lib/wukong/store/cassandra_model.rb
|
305
323
|
- lib/wukong/store/chh_chunked_flat_file_store.rb
|
306
324
|
- lib/wukong/store/chunked_flat_file_store.rb
|
307
325
|
- lib/wukong/store/conditional_store.rb
|
@@ -356,7 +374,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
356
374
|
requirements:
|
357
375
|
- - ">="
|
358
376
|
- !ruby/object:Gem::Version
|
359
|
-
hash: 3
|
360
377
|
segments:
|
361
378
|
- 0
|
362
379
|
version: "0"
|
@@ -365,7 +382,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
365
382
|
requirements:
|
366
383
|
- - ">="
|
367
384
|
- !ruby/object:Gem::Version
|
368
|
-
hash: 3
|
369
385
|
segments:
|
370
386
|
- 0
|
371
387
|
version: "0"
|
@@ -381,6 +397,9 @@ test_files:
|
|
381
397
|
- spec/wukong/encoding_spec.rb
|
382
398
|
- spec/wukong/script_spec.rb
|
383
399
|
- examples/binning_percentile_estimator.rb
|
400
|
+
- examples/cassandra_streaming/avromapper.rb
|
401
|
+
- examples/cassandra_streaming/cassandra_random_partitioner.rb
|
402
|
+
- examples/cassandra_streaming/struct_loader.rb
|
384
403
|
- examples/contrib/jeans/normalize.rb
|
385
404
|
- examples/contrib/jeans/sizes.rb
|
386
405
|
- examples/corpus/words_to_bigrams.rb
|