rbbt-util 5.13.23 → 5.13.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/persist/tsv.rb +23 -6
- data/lib/rbbt/persist/tsv/sharder.rb +82 -20
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +3 -9
- data/lib/rbbt/tsv/accessor.rb +5 -3
- data/lib/rbbt/tsv/parallel/traverse.rb +10 -10
- data/test/rbbt/persist/test_tsv.rb +3 -3
- data/test/rbbt/persist/tsv/test_sharder.rb +22 -10
- data/test/rbbt/test_tsv.rb +8 -0
- data/test/test_helper.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cb02f1584dee7188f5a2bc771f733eb81c46f1a
|
4
|
+
data.tar.gz: 47850a9f40b69fc8cb761dbefea3aec895c1c955
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3459787e3d4aa401917919e6a70bbb4c314477dbce5f9251f0723dbc905af70170f0e844b6d905fe9fd86a232acd1cc98553d3ed84ebca91743a06f0149610c
|
7
|
+
data.tar.gz: 2028a7f0dc8de1adfb4a69e54f139d4a2a329c6e094b4043618792314874e7ee4da08b72327912800d899cca258cccbf015dee6f10f13ffabb9283132f823929
|
data/lib/rbbt/persist/tsv.rb
CHANGED
@@ -28,8 +28,6 @@ rescue Exception
|
|
28
28
|
Log.debug "The kyotocabinet gem could not be loaded. Persistance using this engine will fail."
|
29
29
|
end
|
30
30
|
|
31
|
-
require 'rbbt/persist/tsv/sharder'
|
32
|
-
|
33
31
|
module Persist
|
34
32
|
CONNECTIONS = {}
|
35
33
|
|
@@ -85,14 +83,23 @@ module Persist
|
|
85
83
|
|
86
84
|
if is_persisted?(path) and not persist_options[:update]
|
87
85
|
Log.debug "TSV persistence up-to-date: #{ path }"
|
88
|
-
|
86
|
+
if persist_options[:shard_function]
|
87
|
+
return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
|
88
|
+
else
|
89
|
+
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
|
90
|
+
end
|
89
91
|
end
|
90
92
|
|
91
93
|
Misc.lock lock_filename do
|
92
94
|
begin
|
93
95
|
if is_persisted?(path) and not persist_options[:update]
|
94
96
|
Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
|
95
|
-
|
97
|
+
|
98
|
+
if persist_options[:shard_function]
|
99
|
+
return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
|
100
|
+
else
|
101
|
+
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
|
102
|
+
end
|
96
103
|
end
|
97
104
|
|
98
105
|
FileUtils.rm path if File.exists? path
|
@@ -101,8 +108,15 @@ module Persist
|
|
101
108
|
|
102
109
|
tmp_path = path + '.persist'
|
103
110
|
|
104
|
-
data =
|
105
|
-
|
111
|
+
data = if persist_options[:shard_function]
|
112
|
+
open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
|
113
|
+
else
|
114
|
+
open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
|
115
|
+
end
|
116
|
+
|
117
|
+
if TSV === data and data.serializer.nil?
|
118
|
+
data.serializer = :type
|
119
|
+
end
|
106
120
|
|
107
121
|
data.write_and_read do
|
108
122
|
yield data
|
@@ -123,3 +137,6 @@ module Persist
|
|
123
137
|
end
|
124
138
|
end
|
125
139
|
end
|
140
|
+
|
141
|
+
require 'rbbt/persist/tsv/sharder'
|
142
|
+
|
@@ -1,23 +1,66 @@
|
|
1
|
-
require 'rbbt-util'
|
2
|
-
|
3
1
|
module Persist
|
2
|
+
module SharderAdapter
|
3
|
+
def self.open(path, write, type=nil, &block)
|
4
|
+
|
5
|
+
database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
|
6
|
+
|
7
|
+
database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
|
8
|
+
|
9
|
+
database
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
4
14
|
class Sharder
|
5
|
-
attr_accessor :
|
15
|
+
attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
|
6
16
|
|
7
|
-
def initialize(
|
17
|
+
def initialize(persistence_path, write = false, db_type=nil, &block)
|
8
18
|
@shard_function = block
|
9
|
-
@
|
10
|
-
@databases = {}
|
11
|
-
@directory = directory
|
19
|
+
@persistence_path = Path.setup(persistence_path)
|
12
20
|
@mutex = Mutex.new
|
21
|
+
@writable = write
|
22
|
+
@db_type = db_type
|
23
|
+
|
24
|
+
if write
|
25
|
+
@databases = {}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def <<(key,value)
|
30
|
+
self[key] = value
|
31
|
+
end
|
32
|
+
|
33
|
+
def persistence_path=(path)
|
34
|
+
@persistence_path = path
|
35
|
+
end
|
36
|
+
|
37
|
+
def databases
|
38
|
+
@databases ||= begin
|
39
|
+
hash = {}
|
40
|
+
@persistence_path.glob('shard-*').each do |f|
|
41
|
+
shard = File.basename(f).match(/shard-(.*)/)[1]
|
42
|
+
hash[shard] = Persist.open_database(f, false, :clean, db_type)
|
43
|
+
end
|
44
|
+
hash
|
45
|
+
end
|
13
46
|
end
|
14
47
|
|
15
48
|
def database(key)
|
16
|
-
shard = shard_function.call(key)
|
17
|
-
databases
|
18
|
-
|
19
|
-
|
20
|
-
|
49
|
+
shard = key =~ /__tsv_/ ? "0" : shard_function.call(key)
|
50
|
+
if databases.include? shard
|
51
|
+
databases[shard]
|
52
|
+
else
|
53
|
+
database ||= begin
|
54
|
+
path = File.join(persistence_path, 'shard-' << shard.to_s)
|
55
|
+
(writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
|
56
|
+
end
|
57
|
+
if database
|
58
|
+
databases[shard] = database
|
59
|
+
else
|
60
|
+
Log.warn "Database #{ path } missing" if
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
end
|
21
64
|
end
|
22
65
|
|
23
66
|
MAX_CHAR = 255.chr
|
@@ -69,13 +112,17 @@ module Persist
|
|
69
112
|
end
|
70
113
|
|
71
114
|
def each
|
72
|
-
databases.each do |database|
|
115
|
+
databases.values.each do |database|
|
73
116
|
database.each do |k,v|
|
74
117
|
yield k, v
|
75
118
|
end
|
76
119
|
end
|
77
120
|
end
|
78
121
|
|
122
|
+
def include?(key)
|
123
|
+
self[key] != nil
|
124
|
+
end
|
125
|
+
|
79
126
|
def collect
|
80
127
|
res = []
|
81
128
|
each do |key, value|
|
@@ -89,7 +136,7 @@ module Persist
|
|
89
136
|
end
|
90
137
|
|
91
138
|
def write_and_read
|
92
|
-
lock_filename = Persist.persistence_path(File.join(
|
139
|
+
lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
|
93
140
|
Misc.lock(lock_filename) do
|
94
141
|
@mutex.synchronize do
|
95
142
|
write if @closed or not write?
|
@@ -104,7 +151,7 @@ module Persist
|
|
104
151
|
end
|
105
152
|
|
106
153
|
def write_and_close
|
107
|
-
lock_filename = Persist.persistence_path(File.join(
|
154
|
+
lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
|
108
155
|
Misc.lock(lock_filename) do
|
109
156
|
@mutex.synchronize do
|
110
157
|
write if @closed or not write?
|
@@ -137,15 +184,15 @@ module Persist
|
|
137
184
|
end
|
138
185
|
|
139
186
|
def keys
|
140
|
-
databases.values.collect{|d| d.keys }.flatten
|
187
|
+
databases.values.collect{|d| d.keys }.flatten - TSV::ENTRY_KEYS.to_a
|
141
188
|
end
|
142
189
|
|
143
|
-
def []=(key, value)
|
144
|
-
database(key)[
|
190
|
+
def []=(key, value, clean = false)
|
191
|
+
database(key).send(:[]=, key, value)
|
145
192
|
end
|
146
193
|
|
147
|
-
def [](key,
|
148
|
-
database(key)[key
|
194
|
+
def [](key, clean=false)
|
195
|
+
v = database(key).send(:[], key)
|
149
196
|
end
|
150
197
|
|
151
198
|
def <<(p)
|
@@ -165,4 +212,19 @@ module Persist
|
|
165
212
|
databases.values.each{|database| database.close }
|
166
213
|
end
|
167
214
|
end
|
215
|
+
|
216
|
+
def self.open_sharder(path, write, serializer = nil, tokyocabinet_class = TokyoCabinet::HDB, &shard_function)
|
217
|
+
write = true unless File.exists? path
|
218
|
+
|
219
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
220
|
+
|
221
|
+
database = Persist::SharderAdapter.open(path, write, tokyocabinet_class, &shard_function)
|
222
|
+
|
223
|
+
unless serializer == :clean
|
224
|
+
TSV.setup database
|
225
|
+
database.serializer = serializer if serializer
|
226
|
+
end
|
227
|
+
|
228
|
+
database
|
229
|
+
end
|
168
230
|
end
|
@@ -5,8 +5,8 @@ module Persist
|
|
5
5
|
module TCAdapter
|
6
6
|
attr_accessor :persistence_path, :tokyocabinet_class, :closed, :writable, :mutex
|
7
7
|
|
8
|
-
def self.open(path, write, tokyocabinet_class = TokyoCabinet::HDB)
|
9
|
-
tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB"
|
8
|
+
def self.open(path, write, serializer, tokyocabinet_class = TokyoCabinet::HDB)
|
9
|
+
tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB" or tokyocabinet_class.nil?
|
10
10
|
tokyocabinet_class = TokyoCabinet::BDB if tokyocabinet_class == "BDB"
|
11
11
|
|
12
12
|
database = CONNECTIONS[path] ||= tokyocabinet_class.new
|
@@ -80,12 +80,6 @@ module Persist
|
|
80
80
|
def read?
|
81
81
|
! write?
|
82
82
|
end
|
83
|
-
#def each
|
84
|
-
# iterinit
|
85
|
-
# while key = iternext
|
86
|
-
# yield key, get(key)
|
87
|
-
# end
|
88
|
-
#end
|
89
83
|
|
90
84
|
def collect
|
91
85
|
res = []
|
@@ -164,7 +158,7 @@ module Persist
|
|
164
158
|
|
165
159
|
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
166
160
|
|
167
|
-
database = Persist::TCAdapter.open(path, write, tokyocabinet_class)
|
161
|
+
database = Persist::TCAdapter.open(path, write, serializer, tokyocabinet_class)
|
168
162
|
|
169
163
|
unless serializer == :clean
|
170
164
|
TSV.setup database
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -243,7 +243,7 @@ module TSV
|
|
243
243
|
next if ENTRY_KEYS.include? key
|
244
244
|
|
245
245
|
# TODO Update this to be more efficient
|
246
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
246
|
+
value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
247
247
|
|
248
248
|
# Annotated with Entity and NamedArray
|
249
249
|
if not @unnamed
|
@@ -417,13 +417,15 @@ module TSV
|
|
417
417
|
end
|
418
418
|
|
419
419
|
def namespace=(value)
|
420
|
-
self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value
|
420
|
+
self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value), true)
|
421
421
|
@namespace = value
|
422
422
|
@entity_options = nil
|
423
423
|
end
|
424
424
|
|
425
425
|
def fields=(value)
|
426
|
-
|
426
|
+
clean = true
|
427
|
+
value_ym = value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value)
|
428
|
+
self.send(:[]=, "__tsv_hash_fields", value_ym, clean)
|
427
429
|
@fields = value
|
428
430
|
@named_fields = nil
|
429
431
|
end
|
@@ -54,7 +54,7 @@ module TSV
|
|
54
54
|
def self.report(msg, obj, into)
|
55
55
|
into = into[:into] if Hash === into and into.include? :into
|
56
56
|
|
57
|
-
Log.medium
|
57
|
+
Log.medium{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
|
58
58
|
end
|
59
59
|
|
60
60
|
#{{{ TRAVERSE OBJECTS
|
@@ -135,7 +135,7 @@ module TSV
|
|
135
135
|
callback, bar, join = Misc.process_options options, :callback, :bar, :join
|
136
136
|
if File === io and io.closed?
|
137
137
|
begin
|
138
|
-
Log.medium
|
138
|
+
Log.medium{"Rewinding stream #{stream_name(io)}"}
|
139
139
|
io.reopen io.filename, "r"
|
140
140
|
rescue
|
141
141
|
Log.exception $!
|
@@ -168,7 +168,7 @@ module TSV
|
|
168
168
|
callback, bar, join = Misc.process_options options, :callback, :bar, :join
|
169
169
|
if File === io and io.closed?
|
170
170
|
begin
|
171
|
-
Log.medium
|
171
|
+
Log.medium{"Rewinding stream #{stream_name(io)}"}
|
172
172
|
io.reopen io.filename, "r"
|
173
173
|
rescue
|
174
174
|
Log.exception $!
|
@@ -192,7 +192,7 @@ module TSV
|
|
192
192
|
options[:type] = :single
|
193
193
|
end
|
194
194
|
|
195
|
-
Log.medium
|
195
|
+
Log.medium{"Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
|
196
196
|
begin
|
197
197
|
case obj
|
198
198
|
when TSV
|
@@ -256,28 +256,28 @@ module TSV
|
|
256
256
|
raise "Unknown object for traversal: #{Misc.fingerprint obj }"
|
257
257
|
end
|
258
258
|
rescue IOError
|
259
|
-
Log.medium
|
259
|
+
Log.medium{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
|
260
260
|
stream = obj_stream(obj)
|
261
261
|
stream.abort if stream and stream.respond_to? :abort
|
262
262
|
stream = obj_stream(options[:into])
|
263
263
|
stream.abort if stream.respond_to? :abort
|
264
264
|
raise $!
|
265
265
|
rescue Errno::EPIPE
|
266
|
-
Log.medium
|
266
|
+
Log.medium{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
|
267
267
|
stream = obj_stream(obj)
|
268
268
|
stream.abort if stream and stream.respond_to? :abort
|
269
269
|
stream = obj_stream(options[:into])
|
270
270
|
stream.abort if stream.respond_to? :abort
|
271
271
|
raise $!
|
272
272
|
rescue Aborted
|
273
|
-
Log.medium
|
273
|
+
Log.medium{"Aborted traversing #{stream_name(obj)}"}
|
274
274
|
stream = obj_stream(obj)
|
275
275
|
stream.abort if stream and stream.respond_to? :abort
|
276
276
|
stream = obj_stream(options[:into])
|
277
277
|
stream.abort if stream.respond_to? :abort
|
278
|
-
Log.medium
|
278
|
+
Log.medium{"Aborted traversing 2 #{stream_name(obj)}"}
|
279
279
|
rescue Exception
|
280
|
-
Log.medium
|
280
|
+
Log.medium{"Exception traversing #{stream_name(obj)}"}
|
281
281
|
Log.exception $!
|
282
282
|
stream = obj_stream(obj)
|
283
283
|
stream.abort if stream and stream.respond_to? :abort
|
@@ -335,7 +335,7 @@ module TSV
|
|
335
335
|
q.clean
|
336
336
|
end
|
337
337
|
rescue Interrupt, Aborted
|
338
|
-
Log.medium
|
338
|
+
Log.medium{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
|
339
339
|
q.abort
|
340
340
|
stream = obj_stream(obj)
|
341
341
|
stream.abort if stream.respond_to? :abort
|
@@ -24,13 +24,13 @@ class TestPersistTSV < Test::Unit::TestCase
|
|
24
24
|
Misc.benchmark(1, "Build database with #{MAX - 2} entries") do
|
25
25
|
db = TSV.open(file, :fields => [1], :persist => true, :persist_engine => engine, :persist_dir => tmp_file, :type => :single, :unnamed => true)
|
26
26
|
end
|
27
|
-
|
27
|
+
_test = db.keys.sort{rand}[1..100000]
|
28
28
|
Misc.benchmark(5, "Access #{test.length} random entries") do
|
29
|
-
|
29
|
+
_test.each do |k| db[k] end
|
30
30
|
end
|
31
31
|
Log.info "Profiling access to #{test.length} random entries"
|
32
32
|
Misc.profile :min_percent => 0.1 do
|
33
|
-
|
33
|
+
_test.each do |k| db[k] end
|
34
34
|
end
|
35
35
|
assert_equal "1:10611:G", db["rs189107123"]
|
36
36
|
end
|
@@ -1,26 +1,38 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
|
2
2
|
require 'test/unit'
|
3
|
+
require 'rbbt-util'
|
3
4
|
require 'rbbt/persist/tsv'
|
4
5
|
|
5
6
|
class TestSharder < Test::Unit::TestCase
|
6
7
|
def test_shard
|
7
8
|
TmpFile.with_file do |dir|
|
8
|
-
|
9
|
+
shard_function = Proc.new do |key|
|
9
10
|
key[-1]
|
10
11
|
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
size.times do |v|
|
17
|
-
keys << v.to_s
|
18
|
-
sharder[v.to_s] = [v, v*2]
|
19
|
-
end
|
13
|
+
size = 10
|
14
|
+
sharder = Persist.persist_tsv(nil, "ShardTest", {}, :file => dir, :shard_function => shard_function, :persist => true, :serializer => :float_array) do |db|
|
15
|
+
size.times do |v|
|
16
|
+
db[v.to_s] = [v, v*2]
|
20
17
|
end
|
18
|
+
db
|
19
|
+
end
|
20
|
+
assert_equal dir, sharder.persistence_path
|
21
|
+
assert_equal size, sharder.keys.length
|
22
|
+
|
23
|
+
assert_equal [2,4], sharder["2"]
|
24
|
+
count = 0
|
25
|
+
sharder.through do |k,v|
|
26
|
+
count += 1
|
27
|
+
end
|
28
|
+
assert_equal count, size
|
21
29
|
|
22
|
-
|
30
|
+
sharder = Persist::Sharder.new dir do |key|
|
31
|
+
key[-1]
|
23
32
|
end
|
33
|
+
|
34
|
+
assert_equal size, sharder.keys.length
|
35
|
+
|
24
36
|
end
|
25
37
|
end
|
26
38
|
end
|
data/test/rbbt/test_tsv.rb
CHANGED
@@ -519,4 +519,12 @@ row2 A AA AAA
|
|
519
519
|
end
|
520
520
|
|
521
521
|
end
|
522
|
+
|
523
|
+
def test_shard
|
524
|
+
shard_function = Proc.new do |key|
|
525
|
+
key[-1]
|
526
|
+
end
|
527
|
+
tsv = datafile_test('identifiers').tsv :persist => true, :shard_function => shard_function
|
528
|
+
assert_equal 10000, tsv.keys.length + 2
|
529
|
+
end
|
522
530
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.13.
|
4
|
+
version: 5.13.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|