rbbt-util 5.13.22 → 5.13.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/persist/tsv/sharder.rb +168 -0
- data/lib/rbbt/persist/tsv.rb +2 -0
- data/lib/rbbt/tsv/parallel/traverse.rb +31 -4
- data/test/rbbt/persist/tsv/test_sharder.rb +27 -0
- data/test/rbbt/tsv/parallel/test_traverse.rb +10 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 716be8fb2d3ec4a9e9cc76482f96771856976191
|
4
|
+
data.tar.gz: 958406db01b705360855b2a6bff88b0e0ff804b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3e9739aba63e67c6507ac8e7b31393d6eaae60dde59e5c0bea00faf3132b732a9f5248cb52e3d552c57834cd157258c2799f9993c2caff35d0a65f2c723548b
|
7
|
+
data.tar.gz: 22aa1b9d063dbe6d6150e47e9949dd863ed0c7ba8cdaa90fbb1b61ea4a588ed696694a25daf5f1dfabf54e130339083e55c684a7e78ff37625596f92a8643199
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
|
3
|
+
module Persist
|
4
|
+
class Sharder
|
5
|
+
attr_accessor :directory, :params, :shard_function, :databases, :closed, :writable, :mutex
|
6
|
+
|
7
|
+
def initialize(directory, *rest, &block)
|
8
|
+
@shard_function = block
|
9
|
+
@params = rest
|
10
|
+
@databases = {}
|
11
|
+
@directory = directory
|
12
|
+
@mutex = Mutex.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def database(key)
|
16
|
+
shard = shard_function.call(key)
|
17
|
+
databases[shard] ||= begin
|
18
|
+
path = File.join(directory, 'shard-' << shard.to_s)
|
19
|
+
Persist.open_database(path, *params)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
MAX_CHAR = 255.chr
|
24
|
+
|
25
|
+
def prefix(key)
|
26
|
+
range(key, 1, key + MAX_CHAR, 1)
|
27
|
+
end
|
28
|
+
|
29
|
+
def get_prefix(key)
|
30
|
+
keys = prefix(key)
|
31
|
+
select(:key => keys)
|
32
|
+
end
|
33
|
+
|
34
|
+
def closed?
|
35
|
+
@closed
|
36
|
+
end
|
37
|
+
|
38
|
+
def close
|
39
|
+
@closed = true
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
def read(force = false)
|
44
|
+
return if not write? and not closed and not force
|
45
|
+
self.close
|
46
|
+
databases.each{|d| d.read }
|
47
|
+
@writable = false
|
48
|
+
@closed = false
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def write(force = true)
|
53
|
+
return if write? and not closed and not force
|
54
|
+
self.close
|
55
|
+
|
56
|
+
databases.each{|d| d.write }
|
57
|
+
|
58
|
+
@writable = true
|
59
|
+
@closed = false
|
60
|
+
self
|
61
|
+
end
|
62
|
+
|
63
|
+
def write?
|
64
|
+
@writable
|
65
|
+
end
|
66
|
+
|
67
|
+
def read?
|
68
|
+
! write?
|
69
|
+
end
|
70
|
+
|
71
|
+
def each
|
72
|
+
databases.each do |database|
|
73
|
+
database.each do |k,v|
|
74
|
+
yield k, v
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def collect
|
80
|
+
res = []
|
81
|
+
each do |key, value|
|
82
|
+
res << if block_given?
|
83
|
+
yield key, value
|
84
|
+
else
|
85
|
+
[key, value]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
res
|
89
|
+
end
|
90
|
+
|
91
|
+
def write_and_read
|
92
|
+
lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
|
93
|
+
Misc.lock(lock_filename) do
|
94
|
+
@mutex.synchronize do
|
95
|
+
write if @closed or not write?
|
96
|
+
res = begin
|
97
|
+
yield
|
98
|
+
ensure
|
99
|
+
read
|
100
|
+
end
|
101
|
+
res
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def write_and_close
|
107
|
+
lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
|
108
|
+
Misc.lock(lock_filename) do
|
109
|
+
@mutex.synchronize do
|
110
|
+
write if @closed or not write?
|
111
|
+
res = begin
|
112
|
+
yield
|
113
|
+
ensure
|
114
|
+
close
|
115
|
+
end
|
116
|
+
res
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def read_and_close
|
122
|
+
@mutex.synchronize do
|
123
|
+
read if @closed or not read?
|
124
|
+
res = begin
|
125
|
+
yield
|
126
|
+
ensure
|
127
|
+
close
|
128
|
+
end
|
129
|
+
res
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def merge!(hash)
|
134
|
+
hash.each do |key,values|
|
135
|
+
self[key] = values
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def keys
|
140
|
+
databases.values.collect{|d| d.keys }.flatten
|
141
|
+
end
|
142
|
+
|
143
|
+
def []=(key, value)
|
144
|
+
database(key)[key] = value
|
145
|
+
end
|
146
|
+
|
147
|
+
def [](key, value)
|
148
|
+
database(key)[key]
|
149
|
+
end
|
150
|
+
|
151
|
+
def <<(p)
|
152
|
+
return if p.nil?
|
153
|
+
self[p.first] = p.last
|
154
|
+
end
|
155
|
+
|
156
|
+
def write
|
157
|
+
databases.values.each{|database| database.write }
|
158
|
+
end
|
159
|
+
|
160
|
+
def read
|
161
|
+
databases.values.each{|database| database.read }
|
162
|
+
end
|
163
|
+
|
164
|
+
def close
|
165
|
+
databases.values.each{|database| database.close }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
data/lib/rbbt/persist/tsv.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
module TSV
|
2
|
+
|
3
|
+
module MultipleResult; end
|
4
|
+
|
2
5
|
def self.obj_stream(obj)
|
3
6
|
case obj
|
4
7
|
when (defined? Step and Step)
|
@@ -15,7 +18,7 @@ module TSV
|
|
15
18
|
def self.guess_max(obj)
|
16
19
|
begin
|
17
20
|
case obj
|
18
|
-
when Step
|
21
|
+
when (defined? Step and Step)
|
19
22
|
if obj.done?
|
20
23
|
CMD.cmd("wc -l '#{obj.path.find}'").read.to_i
|
21
24
|
else
|
@@ -27,8 +30,15 @@ module TSV
|
|
27
30
|
CMD.cmd("wc -l '#{obj.filename}'").read.to_i
|
28
31
|
when Path
|
29
32
|
CMD.cmd("wc -l '#{obj.find}'").read.to_i
|
33
|
+
when String
|
34
|
+
if File.exists? obj
|
35
|
+
CMD.cmd("wc -l '#{obj}'").read.to_i
|
36
|
+
else
|
37
|
+
nil
|
38
|
+
end
|
30
39
|
end
|
31
40
|
rescue Exception
|
41
|
+
Log.exception $!
|
32
42
|
nil
|
33
43
|
end
|
34
44
|
end
|
@@ -69,6 +79,7 @@ module TSV
|
|
69
79
|
end
|
70
80
|
end
|
71
81
|
end
|
82
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
72
83
|
join.call if join
|
73
84
|
end
|
74
85
|
|
@@ -92,6 +103,7 @@ module TSV
|
|
92
103
|
end
|
93
104
|
end
|
94
105
|
end
|
106
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
95
107
|
join.call if join
|
96
108
|
end
|
97
109
|
|
@@ -115,6 +127,7 @@ module TSV
|
|
115
127
|
end
|
116
128
|
end
|
117
129
|
end
|
130
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
118
131
|
join.call if join
|
119
132
|
end
|
120
133
|
|
@@ -132,14 +145,22 @@ module TSV
|
|
132
145
|
|
133
146
|
if callback
|
134
147
|
while line = io.gets
|
135
|
-
|
136
|
-
|
148
|
+
begin
|
149
|
+
callback.call yield line.strip
|
150
|
+
ensure
|
151
|
+
bar.tick if bar
|
152
|
+
end
|
137
153
|
end
|
138
154
|
else
|
139
155
|
while line = io.gets
|
140
|
-
|
156
|
+
begin
|
157
|
+
yield line.strip
|
158
|
+
ensure
|
159
|
+
bar.tick if bar
|
160
|
+
end
|
141
161
|
end
|
142
162
|
end
|
163
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
143
164
|
join.call if join
|
144
165
|
end
|
145
166
|
|
@@ -332,6 +353,12 @@ module TSV
|
|
332
353
|
end
|
333
354
|
|
334
355
|
def self.store_into(store, value)
|
356
|
+
if MultipleResult === value
|
357
|
+
value.each do |v|
|
358
|
+
store_into store, v
|
359
|
+
end
|
360
|
+
return
|
361
|
+
end
|
335
362
|
begin
|
336
363
|
case store
|
337
364
|
when Hash
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/persist/tsv'
|
4
|
+
|
5
|
+
class TestSharder < Test::Unit::TestCase
|
6
|
+
def test_shard
|
7
|
+
TmpFile.with_file do |dir|
|
8
|
+
sharder = Persist::Sharder.new dir, true, :float_array, 'HDB' do |key|
|
9
|
+
key[-1]
|
10
|
+
end
|
11
|
+
|
12
|
+
keys = []
|
13
|
+
size = 1_000_000
|
14
|
+
Misc.benchmark(2) do
|
15
|
+
sharder.write_and_read do
|
16
|
+
size.times do |v|
|
17
|
+
keys << v.to_s
|
18
|
+
sharder[v.to_s] = [v, v*2]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
assert_equal size, sharder.keys.length
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -366,4 +366,14 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
366
366
|
end
|
367
367
|
assert_equal size, stream.read.split("\n").length
|
368
368
|
end
|
369
|
+
|
370
|
+
def test_store_multiple
|
371
|
+
size = 1000
|
372
|
+
array = (1..size).to_a.collect{|n| n.to_s}
|
373
|
+
stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
|
374
|
+
sleep 0.01
|
375
|
+
[e,e+".alt"].extend TSV::MultipleResult
|
376
|
+
end
|
377
|
+
assert_equal size*2, stream.read.split("\n").length
|
378
|
+
end
|
369
379
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.13.
|
4
|
+
version: 5.13.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -136,6 +136,7 @@ files:
|
|
136
136
|
- lib/rbbt/persist/tsv/kyotocabinet.rb
|
137
137
|
- lib/rbbt/persist/tsv/leveldb.rb
|
138
138
|
- lib/rbbt/persist/tsv/lmdb.rb
|
139
|
+
- lib/rbbt/persist/tsv/sharder.rb
|
139
140
|
- lib/rbbt/persist/tsv/tokyocabinet.rb
|
140
141
|
- lib/rbbt/resource.rb
|
141
142
|
- lib/rbbt/resource/path.rb
|
@@ -278,6 +279,7 @@ files:
|
|
278
279
|
- test/rbbt/persist/tsv/test_kyotocabinet.rb
|
279
280
|
- test/rbbt/persist/tsv/test_leveldb.rb
|
280
281
|
- test/rbbt/persist/tsv/test_lmdb.rb
|
282
|
+
- test/rbbt/persist/tsv/test_sharder.rb
|
281
283
|
- test/rbbt/persist/tsv/test_tokyocabinet.rb
|
282
284
|
- test/rbbt/resource/test_path.rb
|
283
285
|
- test/rbbt/test_annotations.rb
|
@@ -395,6 +397,7 @@ test_files:
|
|
395
397
|
- test/rbbt/persist/test_tsv.rb
|
396
398
|
- test/rbbt/persist/tsv/test_lmdb.rb
|
397
399
|
- test/rbbt/persist/tsv/test_kyotocabinet.rb
|
400
|
+
- test/rbbt/persist/tsv/test_sharder.rb
|
398
401
|
- test/rbbt/persist/tsv/test_cdb.rb
|
399
402
|
- test/rbbt/persist/tsv/test_tokyocabinet.rb
|
400
403
|
- test/rbbt/persist/tsv/test_leveldb.rb
|