rbbt-util 5.13.23 → 5.13.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/persist/tsv.rb +23 -6
- data/lib/rbbt/persist/tsv/sharder.rb +82 -20
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +3 -9
- data/lib/rbbt/tsv/accessor.rb +5 -3
- data/lib/rbbt/tsv/parallel/traverse.rb +10 -10
- data/test/rbbt/persist/test_tsv.rb +3 -3
- data/test/rbbt/persist/tsv/test_sharder.rb +22 -10
- data/test/rbbt/test_tsv.rb +8 -0
- data/test/test_helper.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cb02f1584dee7188f5a2bc771f733eb81c46f1a
|
4
|
+
data.tar.gz: 47850a9f40b69fc8cb761dbefea3aec895c1c955
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3459787e3d4aa401917919e6a70bbb4c314477dbce5f9251f0723dbc905af70170f0e844b6d905fe9fd86a232acd1cc98553d3ed84ebca91743a06f0149610c
|
7
|
+
data.tar.gz: 2028a7f0dc8de1adfb4a69e54f139d4a2a329c6e094b4043618792314874e7ee4da08b72327912800d899cca258cccbf015dee6f10f13ffabb9283132f823929
|
data/lib/rbbt/persist/tsv.rb
CHANGED
@@ -28,8 +28,6 @@ rescue Exception
|
|
28
28
|
Log.debug "The kyotocabinet gem could not be loaded. Persistance using this engine will fail."
|
29
29
|
end
|
30
30
|
|
31
|
-
require 'rbbt/persist/tsv/sharder'
|
32
|
-
|
33
31
|
module Persist
|
34
32
|
CONNECTIONS = {}
|
35
33
|
|
@@ -85,14 +83,23 @@ module Persist
|
|
85
83
|
|
86
84
|
if is_persisted?(path) and not persist_options[:update]
|
87
85
|
Log.debug "TSV persistence up-to-date: #{ path }"
|
88
|
-
|
86
|
+
if persist_options[:shard_function]
|
87
|
+
return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
|
88
|
+
else
|
89
|
+
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
|
90
|
+
end
|
89
91
|
end
|
90
92
|
|
91
93
|
Misc.lock lock_filename do
|
92
94
|
begin
|
93
95
|
if is_persisted?(path) and not persist_options[:update]
|
94
96
|
Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
|
95
|
-
|
97
|
+
|
98
|
+
if persist_options[:shard_function]
|
99
|
+
return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
|
100
|
+
else
|
101
|
+
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
|
102
|
+
end
|
96
103
|
end
|
97
104
|
|
98
105
|
FileUtils.rm path if File.exists? path
|
@@ -101,8 +108,15 @@ module Persist
|
|
101
108
|
|
102
109
|
tmp_path = path + '.persist'
|
103
110
|
|
104
|
-
data =
|
105
|
-
|
111
|
+
data = if persist_options[:shard_function]
|
112
|
+
open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
|
113
|
+
else
|
114
|
+
open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
|
115
|
+
end
|
116
|
+
|
117
|
+
if TSV === data and data.serializer.nil?
|
118
|
+
data.serializer = :type
|
119
|
+
end
|
106
120
|
|
107
121
|
data.write_and_read do
|
108
122
|
yield data
|
@@ -123,3 +137,6 @@ module Persist
|
|
123
137
|
end
|
124
138
|
end
|
125
139
|
end
|
140
|
+
|
141
|
+
require 'rbbt/persist/tsv/sharder'
|
142
|
+
|
@@ -1,23 +1,66 @@
|
|
1
|
-
require 'rbbt-util'
|
2
|
-
|
3
1
|
module Persist
|
2
|
+
module SharderAdapter
|
3
|
+
def self.open(path, write, type=nil, &block)
|
4
|
+
|
5
|
+
database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
|
6
|
+
|
7
|
+
database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
|
8
|
+
|
9
|
+
database
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
4
14
|
class Sharder
|
5
|
-
attr_accessor :
|
15
|
+
attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
|
6
16
|
|
7
|
-
def initialize(
|
17
|
+
def initialize(persistence_path, write = false, db_type=nil, &block)
|
8
18
|
@shard_function = block
|
9
|
-
@
|
10
|
-
@databases = {}
|
11
|
-
@directory = directory
|
19
|
+
@persistence_path = Path.setup(persistence_path)
|
12
20
|
@mutex = Mutex.new
|
21
|
+
@writable = write
|
22
|
+
@db_type = db_type
|
23
|
+
|
24
|
+
if write
|
25
|
+
@databases = {}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def <<(key,value)
|
30
|
+
self[key] = value
|
31
|
+
end
|
32
|
+
|
33
|
+
def persistence_path=(path)
|
34
|
+
@persistence_path = path
|
35
|
+
end
|
36
|
+
|
37
|
+
def databases
|
38
|
+
@databases ||= begin
|
39
|
+
hash = {}
|
40
|
+
@persistence_path.glob('shard-*').each do |f|
|
41
|
+
shard = File.basename(f).match(/shard-(.*)/)[1]
|
42
|
+
hash[shard] = Persist.open_database(f, false, :clean, db_type)
|
43
|
+
end
|
44
|
+
hash
|
45
|
+
end
|
13
46
|
end
|
14
47
|
|
15
48
|
def database(key)
|
16
|
-
shard = shard_function.call(key)
|
17
|
-
databases
|
18
|
-
|
19
|
-
|
20
|
-
|
49
|
+
shard = key =~ /__tsv_/ ? "0" : shard_function.call(key)
|
50
|
+
if databases.include? shard
|
51
|
+
databases[shard]
|
52
|
+
else
|
53
|
+
database ||= begin
|
54
|
+
path = File.join(persistence_path, 'shard-' << shard.to_s)
|
55
|
+
(writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
|
56
|
+
end
|
57
|
+
if database
|
58
|
+
databases[shard] = database
|
59
|
+
else
|
60
|
+
Log.warn "Database #{ path } missing" if
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
end
|
21
64
|
end
|
22
65
|
|
23
66
|
MAX_CHAR = 255.chr
|
@@ -69,13 +112,17 @@ module Persist
|
|
69
112
|
end
|
70
113
|
|
71
114
|
def each
|
72
|
-
databases.each do |database|
|
115
|
+
databases.values.each do |database|
|
73
116
|
database.each do |k,v|
|
74
117
|
yield k, v
|
75
118
|
end
|
76
119
|
end
|
77
120
|
end
|
78
121
|
|
122
|
+
def include?(key)
|
123
|
+
self[key] != nil
|
124
|
+
end
|
125
|
+
|
79
126
|
def collect
|
80
127
|
res = []
|
81
128
|
each do |key, value|
|
@@ -89,7 +136,7 @@ module Persist
|
|
89
136
|
end
|
90
137
|
|
91
138
|
def write_and_read
|
92
|
-
lock_filename = Persist.persistence_path(File.join(
|
139
|
+
lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
|
93
140
|
Misc.lock(lock_filename) do
|
94
141
|
@mutex.synchronize do
|
95
142
|
write if @closed or not write?
|
@@ -104,7 +151,7 @@ module Persist
|
|
104
151
|
end
|
105
152
|
|
106
153
|
def write_and_close
|
107
|
-
lock_filename = Persist.persistence_path(File.join(
|
154
|
+
lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
|
108
155
|
Misc.lock(lock_filename) do
|
109
156
|
@mutex.synchronize do
|
110
157
|
write if @closed or not write?
|
@@ -137,15 +184,15 @@ module Persist
|
|
137
184
|
end
|
138
185
|
|
139
186
|
def keys
|
140
|
-
databases.values.collect{|d| d.keys }.flatten
|
187
|
+
databases.values.collect{|d| d.keys }.flatten - TSV::ENTRY_KEYS.to_a
|
141
188
|
end
|
142
189
|
|
143
|
-
def []=(key, value)
|
144
|
-
database(key)[
|
190
|
+
def []=(key, value, clean = false)
|
191
|
+
database(key).send(:[]=, key, value)
|
145
192
|
end
|
146
193
|
|
147
|
-
def [](key,
|
148
|
-
database(key)[key
|
194
|
+
def [](key, clean=false)
|
195
|
+
v = database(key).send(:[], key)
|
149
196
|
end
|
150
197
|
|
151
198
|
def <<(p)
|
@@ -165,4 +212,19 @@ module Persist
|
|
165
212
|
databases.values.each{|database| database.close }
|
166
213
|
end
|
167
214
|
end
|
215
|
+
|
216
|
+
def self.open_sharder(path, write, serializer = nil, tokyocabinet_class = TokyoCabinet::HDB, &shard_function)
|
217
|
+
write = true unless File.exists? path
|
218
|
+
|
219
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
220
|
+
|
221
|
+
database = Persist::SharderAdapter.open(path, write, tokyocabinet_class, &shard_function)
|
222
|
+
|
223
|
+
unless serializer == :clean
|
224
|
+
TSV.setup database
|
225
|
+
database.serializer = serializer if serializer
|
226
|
+
end
|
227
|
+
|
228
|
+
database
|
229
|
+
end
|
168
230
|
end
|
@@ -5,8 +5,8 @@ module Persist
|
|
5
5
|
module TCAdapter
|
6
6
|
attr_accessor :persistence_path, :tokyocabinet_class, :closed, :writable, :mutex
|
7
7
|
|
8
|
-
def self.open(path, write, tokyocabinet_class = TokyoCabinet::HDB)
|
9
|
-
tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB"
|
8
|
+
def self.open(path, write, serializer, tokyocabinet_class = TokyoCabinet::HDB)
|
9
|
+
tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB" or tokyocabinet_class.nil?
|
10
10
|
tokyocabinet_class = TokyoCabinet::BDB if tokyocabinet_class == "BDB"
|
11
11
|
|
12
12
|
database = CONNECTIONS[path] ||= tokyocabinet_class.new
|
@@ -80,12 +80,6 @@ module Persist
|
|
80
80
|
def read?
|
81
81
|
! write?
|
82
82
|
end
|
83
|
-
#def each
|
84
|
-
# iterinit
|
85
|
-
# while key = iternext
|
86
|
-
# yield key, get(key)
|
87
|
-
# end
|
88
|
-
#end
|
89
83
|
|
90
84
|
def collect
|
91
85
|
res = []
|
@@ -164,7 +158,7 @@ module Persist
|
|
164
158
|
|
165
159
|
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
166
160
|
|
167
|
-
database = Persist::TCAdapter.open(path, write, tokyocabinet_class)
|
161
|
+
database = Persist::TCAdapter.open(path, write, serializer, tokyocabinet_class)
|
168
162
|
|
169
163
|
unless serializer == :clean
|
170
164
|
TSV.setup database
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -243,7 +243,7 @@ module TSV
|
|
243
243
|
next if ENTRY_KEYS.include? key
|
244
244
|
|
245
245
|
# TODO Update this to be more efficient
|
246
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
246
|
+
value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
247
247
|
|
248
248
|
# Annotated with Entity and NamedArray
|
249
249
|
if not @unnamed
|
@@ -417,13 +417,15 @@ module TSV
|
|
417
417
|
end
|
418
418
|
|
419
419
|
def namespace=(value)
|
420
|
-
self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value
|
420
|
+
self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value), true)
|
421
421
|
@namespace = value
|
422
422
|
@entity_options = nil
|
423
423
|
end
|
424
424
|
|
425
425
|
def fields=(value)
|
426
|
-
|
426
|
+
clean = true
|
427
|
+
value_ym = value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value)
|
428
|
+
self.send(:[]=, "__tsv_hash_fields", value_ym, clean)
|
427
429
|
@fields = value
|
428
430
|
@named_fields = nil
|
429
431
|
end
|
@@ -54,7 +54,7 @@ module TSV
|
|
54
54
|
def self.report(msg, obj, into)
|
55
55
|
into = into[:into] if Hash === into and into.include? :into
|
56
56
|
|
57
|
-
Log.medium
|
57
|
+
Log.medium{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
|
58
58
|
end
|
59
59
|
|
60
60
|
#{{{ TRAVERSE OBJECTS
|
@@ -135,7 +135,7 @@ module TSV
|
|
135
135
|
callback, bar, join = Misc.process_options options, :callback, :bar, :join
|
136
136
|
if File === io and io.closed?
|
137
137
|
begin
|
138
|
-
Log.medium
|
138
|
+
Log.medium{"Rewinding stream #{stream_name(io)}"}
|
139
139
|
io.reopen io.filename, "r"
|
140
140
|
rescue
|
141
141
|
Log.exception $!
|
@@ -168,7 +168,7 @@ module TSV
|
|
168
168
|
callback, bar, join = Misc.process_options options, :callback, :bar, :join
|
169
169
|
if File === io and io.closed?
|
170
170
|
begin
|
171
|
-
Log.medium
|
171
|
+
Log.medium{"Rewinding stream #{stream_name(io)}"}
|
172
172
|
io.reopen io.filename, "r"
|
173
173
|
rescue
|
174
174
|
Log.exception $!
|
@@ -192,7 +192,7 @@ module TSV
|
|
192
192
|
options[:type] = :single
|
193
193
|
end
|
194
194
|
|
195
|
-
Log.medium
|
195
|
+
Log.medium{"Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
|
196
196
|
begin
|
197
197
|
case obj
|
198
198
|
when TSV
|
@@ -256,28 +256,28 @@ module TSV
|
|
256
256
|
raise "Unknown object for traversal: #{Misc.fingerprint obj }"
|
257
257
|
end
|
258
258
|
rescue IOError
|
259
|
-
Log.medium
|
259
|
+
Log.medium{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
|
260
260
|
stream = obj_stream(obj)
|
261
261
|
stream.abort if stream and stream.respond_to? :abort
|
262
262
|
stream = obj_stream(options[:into])
|
263
263
|
stream.abort if stream.respond_to? :abort
|
264
264
|
raise $!
|
265
265
|
rescue Errno::EPIPE
|
266
|
-
Log.medium
|
266
|
+
Log.medium{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
|
267
267
|
stream = obj_stream(obj)
|
268
268
|
stream.abort if stream and stream.respond_to? :abort
|
269
269
|
stream = obj_stream(options[:into])
|
270
270
|
stream.abort if stream.respond_to? :abort
|
271
271
|
raise $!
|
272
272
|
rescue Aborted
|
273
|
-
Log.medium
|
273
|
+
Log.medium{"Aborted traversing #{stream_name(obj)}"}
|
274
274
|
stream = obj_stream(obj)
|
275
275
|
stream.abort if stream and stream.respond_to? :abort
|
276
276
|
stream = obj_stream(options[:into])
|
277
277
|
stream.abort if stream.respond_to? :abort
|
278
|
-
Log.medium
|
278
|
+
Log.medium{"Aborted traversing 2 #{stream_name(obj)}"}
|
279
279
|
rescue Exception
|
280
|
-
Log.medium
|
280
|
+
Log.medium{"Exception traversing #{stream_name(obj)}"}
|
281
281
|
Log.exception $!
|
282
282
|
stream = obj_stream(obj)
|
283
283
|
stream.abort if stream and stream.respond_to? :abort
|
@@ -335,7 +335,7 @@ module TSV
|
|
335
335
|
q.clean
|
336
336
|
end
|
337
337
|
rescue Interrupt, Aborted
|
338
|
-
Log.medium
|
338
|
+
Log.medium{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
|
339
339
|
q.abort
|
340
340
|
stream = obj_stream(obj)
|
341
341
|
stream.abort if stream.respond_to? :abort
|
@@ -24,13 +24,13 @@ class TestPersistTSV < Test::Unit::TestCase
|
|
24
24
|
Misc.benchmark(1, "Build database with #{MAX - 2} entries") do
|
25
25
|
db = TSV.open(file, :fields => [1], :persist => true, :persist_engine => engine, :persist_dir => tmp_file, :type => :single, :unnamed => true)
|
26
26
|
end
|
27
|
-
|
27
|
+
_test = db.keys.sort{rand}[1..100000]
|
28
28
|
Misc.benchmark(5, "Access #{test.length} random entries") do
|
29
|
-
|
29
|
+
_test.each do |k| db[k] end
|
30
30
|
end
|
31
31
|
Log.info "Profiling access to #{test.length} random entries"
|
32
32
|
Misc.profile :min_percent => 0.1 do
|
33
|
-
|
33
|
+
_test.each do |k| db[k] end
|
34
34
|
end
|
35
35
|
assert_equal "1:10611:G", db["rs189107123"]
|
36
36
|
end
|
@@ -1,26 +1,38 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
|
2
2
|
require 'test/unit'
|
3
|
+
require 'rbbt-util'
|
3
4
|
require 'rbbt/persist/tsv'
|
4
5
|
|
5
6
|
class TestSharder < Test::Unit::TestCase
|
6
7
|
def test_shard
|
7
8
|
TmpFile.with_file do |dir|
|
8
|
-
|
9
|
+
shard_function = Proc.new do |key|
|
9
10
|
key[-1]
|
10
11
|
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
size.times do |v|
|
17
|
-
keys << v.to_s
|
18
|
-
sharder[v.to_s] = [v, v*2]
|
19
|
-
end
|
13
|
+
size = 10
|
14
|
+
sharder = Persist.persist_tsv(nil, "ShardTest", {}, :file => dir, :shard_function => shard_function, :persist => true, :serializer => :float_array) do |db|
|
15
|
+
size.times do |v|
|
16
|
+
db[v.to_s] = [v, v*2]
|
20
17
|
end
|
18
|
+
db
|
19
|
+
end
|
20
|
+
assert_equal dir, sharder.persistence_path
|
21
|
+
assert_equal size, sharder.keys.length
|
22
|
+
|
23
|
+
assert_equal [2,4], sharder["2"]
|
24
|
+
count = 0
|
25
|
+
sharder.through do |k,v|
|
26
|
+
count += 1
|
27
|
+
end
|
28
|
+
assert_equal count, size
|
21
29
|
|
22
|
-
|
30
|
+
sharder = Persist::Sharder.new dir do |key|
|
31
|
+
key[-1]
|
23
32
|
end
|
33
|
+
|
34
|
+
assert_equal size, sharder.keys.length
|
35
|
+
|
24
36
|
end
|
25
37
|
end
|
26
38
|
end
|
data/test/rbbt/test_tsv.rb
CHANGED
@@ -519,4 +519,12 @@ row2 A AA AAA
|
|
519
519
|
end
|
520
520
|
|
521
521
|
end
|
522
|
+
|
523
|
+
def test_shard
|
524
|
+
shard_function = Proc.new do |key|
|
525
|
+
key[-1]
|
526
|
+
end
|
527
|
+
tsv = datafile_test('identifiers').tsv :persist => true, :shard_function => shard_function
|
528
|
+
assert_equal 10000, tsv.keys.length + 2
|
529
|
+
end
|
522
530
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.13.
|
4
|
+
version: 5.13.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|