rbbt-util 5.13.22 → 5.13.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/persist/tsv/sharder.rb +168 -0
- data/lib/rbbt/persist/tsv.rb +2 -0
- data/lib/rbbt/tsv/parallel/traverse.rb +31 -4
- data/test/rbbt/persist/tsv/test_sharder.rb +27 -0
- data/test/rbbt/tsv/parallel/test_traverse.rb +10 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 716be8fb2d3ec4a9e9cc76482f96771856976191
|
4
|
+
data.tar.gz: 958406db01b705360855b2a6bff88b0e0ff804b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3e9739aba63e67c6507ac8e7b31393d6eaae60dde59e5c0bea00faf3132b732a9f5248cb52e3d552c57834cd157258c2799f9993c2caff35d0a65f2c723548b
|
7
|
+
data.tar.gz: 22aa1b9d063dbe6d6150e47e9949dd863ed0c7ba8cdaa90fbb1b61ea4a588ed696694a25daf5f1dfabf54e130339083e55c684a7e78ff37625596f92a8643199
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
|
3
|
+
module Persist
|
4
|
+
class Sharder
|
5
|
+
attr_accessor :directory, :params, :shard_function, :databases, :closed, :writable, :mutex
|
6
|
+
|
7
|
+
def initialize(directory, *rest, &block)
|
8
|
+
@shard_function = block
|
9
|
+
@params = rest
|
10
|
+
@databases = {}
|
11
|
+
@directory = directory
|
12
|
+
@mutex = Mutex.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def database(key)
|
16
|
+
shard = shard_function.call(key)
|
17
|
+
databases[shard] ||= begin
|
18
|
+
path = File.join(directory, 'shard-' << shard.to_s)
|
19
|
+
Persist.open_database(path, *params)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
MAX_CHAR = 255.chr
|
24
|
+
|
25
|
+
def prefix(key)
|
26
|
+
range(key, 1, key + MAX_CHAR, 1)
|
27
|
+
end
|
28
|
+
|
29
|
+
def get_prefix(key)
|
30
|
+
keys = prefix(key)
|
31
|
+
select(:key => keys)
|
32
|
+
end
|
33
|
+
|
34
|
+
def closed?
|
35
|
+
@closed
|
36
|
+
end
|
37
|
+
|
38
|
+
def close
|
39
|
+
@closed = true
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
def read(force = false)
|
44
|
+
return if not write? and not closed and not force
|
45
|
+
self.close
|
46
|
+
databases.each{|d| d.read }
|
47
|
+
@writable = false
|
48
|
+
@closed = false
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def write(force = true)
|
53
|
+
return if write? and not closed and not force
|
54
|
+
self.close
|
55
|
+
|
56
|
+
databases.each{|d| d.write }
|
57
|
+
|
58
|
+
@writable = true
|
59
|
+
@closed = false
|
60
|
+
self
|
61
|
+
end
|
62
|
+
|
63
|
+
def write?
|
64
|
+
@writable
|
65
|
+
end
|
66
|
+
|
67
|
+
def read?
|
68
|
+
! write?
|
69
|
+
end
|
70
|
+
|
71
|
+
def each
|
72
|
+
databases.each do |database|
|
73
|
+
database.each do |k,v|
|
74
|
+
yield k, v
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def collect
|
80
|
+
res = []
|
81
|
+
each do |key, value|
|
82
|
+
res << if block_given?
|
83
|
+
yield key, value
|
84
|
+
else
|
85
|
+
[key, value]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
res
|
89
|
+
end
|
90
|
+
|
91
|
+
def write_and_read
|
92
|
+
lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
|
93
|
+
Misc.lock(lock_filename) do
|
94
|
+
@mutex.synchronize do
|
95
|
+
write if @closed or not write?
|
96
|
+
res = begin
|
97
|
+
yield
|
98
|
+
ensure
|
99
|
+
read
|
100
|
+
end
|
101
|
+
res
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def write_and_close
|
107
|
+
lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
|
108
|
+
Misc.lock(lock_filename) do
|
109
|
+
@mutex.synchronize do
|
110
|
+
write if @closed or not write?
|
111
|
+
res = begin
|
112
|
+
yield
|
113
|
+
ensure
|
114
|
+
close
|
115
|
+
end
|
116
|
+
res
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def read_and_close
|
122
|
+
@mutex.synchronize do
|
123
|
+
read if @closed or not read?
|
124
|
+
res = begin
|
125
|
+
yield
|
126
|
+
ensure
|
127
|
+
close
|
128
|
+
end
|
129
|
+
res
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def merge!(hash)
|
134
|
+
hash.each do |key,values|
|
135
|
+
self[key] = values
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def keys
|
140
|
+
databases.values.collect{|d| d.keys }.flatten
|
141
|
+
end
|
142
|
+
|
143
|
+
def []=(key, value)
|
144
|
+
database(key)[key] = value
|
145
|
+
end
|
146
|
+
|
147
|
+
def [](key, value)
|
148
|
+
database(key)[key]
|
149
|
+
end
|
150
|
+
|
151
|
+
def <<(p)
|
152
|
+
return if p.nil?
|
153
|
+
self[p.first] = p.last
|
154
|
+
end
|
155
|
+
|
156
|
+
def write
|
157
|
+
databases.values.each{|database| database.write }
|
158
|
+
end
|
159
|
+
|
160
|
+
def read
|
161
|
+
databases.values.each{|database| database.read }
|
162
|
+
end
|
163
|
+
|
164
|
+
def close
|
165
|
+
databases.values.each{|database| database.close }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
data/lib/rbbt/persist/tsv.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
module TSV
|
2
|
+
|
3
|
+
module MultipleResult; end
|
4
|
+
|
2
5
|
def self.obj_stream(obj)
|
3
6
|
case obj
|
4
7
|
when (defined? Step and Step)
|
@@ -15,7 +18,7 @@ module TSV
|
|
15
18
|
def self.guess_max(obj)
|
16
19
|
begin
|
17
20
|
case obj
|
18
|
-
when Step
|
21
|
+
when (defined? Step and Step)
|
19
22
|
if obj.done?
|
20
23
|
CMD.cmd("wc -l '#{obj.path.find}'").read.to_i
|
21
24
|
else
|
@@ -27,8 +30,15 @@ module TSV
|
|
27
30
|
CMD.cmd("wc -l '#{obj.filename}'").read.to_i
|
28
31
|
when Path
|
29
32
|
CMD.cmd("wc -l '#{obj.find}'").read.to_i
|
33
|
+
when String
|
34
|
+
if File.exists? obj
|
35
|
+
CMD.cmd("wc -l '#{obj}'").read.to_i
|
36
|
+
else
|
37
|
+
nil
|
38
|
+
end
|
30
39
|
end
|
31
40
|
rescue Exception
|
41
|
+
Log.exception $!
|
32
42
|
nil
|
33
43
|
end
|
34
44
|
end
|
@@ -69,6 +79,7 @@ module TSV
|
|
69
79
|
end
|
70
80
|
end
|
71
81
|
end
|
82
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
72
83
|
join.call if join
|
73
84
|
end
|
74
85
|
|
@@ -92,6 +103,7 @@ module TSV
|
|
92
103
|
end
|
93
104
|
end
|
94
105
|
end
|
106
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
95
107
|
join.call if join
|
96
108
|
end
|
97
109
|
|
@@ -115,6 +127,7 @@ module TSV
|
|
115
127
|
end
|
116
128
|
end
|
117
129
|
end
|
130
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
118
131
|
join.call if join
|
119
132
|
end
|
120
133
|
|
@@ -132,14 +145,22 @@ module TSV
|
|
132
145
|
|
133
146
|
if callback
|
134
147
|
while line = io.gets
|
135
|
-
|
136
|
-
|
148
|
+
begin
|
149
|
+
callback.call yield line.strip
|
150
|
+
ensure
|
151
|
+
bar.tick if bar
|
152
|
+
end
|
137
153
|
end
|
138
154
|
else
|
139
155
|
while line = io.gets
|
140
|
-
|
156
|
+
begin
|
157
|
+
yield line.strip
|
158
|
+
ensure
|
159
|
+
bar.tick if bar
|
160
|
+
end
|
141
161
|
end
|
142
162
|
end
|
163
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
143
164
|
join.call if join
|
144
165
|
end
|
145
166
|
|
@@ -332,6 +353,12 @@ module TSV
|
|
332
353
|
end
|
333
354
|
|
334
355
|
def self.store_into(store, value)
|
356
|
+
if MultipleResult === value
|
357
|
+
value.each do |v|
|
358
|
+
store_into store, v
|
359
|
+
end
|
360
|
+
return
|
361
|
+
end
|
335
362
|
begin
|
336
363
|
case store
|
337
364
|
when Hash
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/persist/tsv'
|
4
|
+
|
5
|
+
class TestSharder < Test::Unit::TestCase
|
6
|
+
def test_shard
|
7
|
+
TmpFile.with_file do |dir|
|
8
|
+
sharder = Persist::Sharder.new dir, true, :float_array, 'HDB' do |key|
|
9
|
+
key[-1]
|
10
|
+
end
|
11
|
+
|
12
|
+
keys = []
|
13
|
+
size = 1_000_000
|
14
|
+
Misc.benchmark(2) do
|
15
|
+
sharder.write_and_read do
|
16
|
+
size.times do |v|
|
17
|
+
keys << v.to_s
|
18
|
+
sharder[v.to_s] = [v, v*2]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
assert_equal size, sharder.keys.length
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -366,4 +366,14 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
366
366
|
end
|
367
367
|
assert_equal size, stream.read.split("\n").length
|
368
368
|
end
|
369
|
+
|
370
|
+
def test_store_multiple
|
371
|
+
size = 1000
|
372
|
+
array = (1..size).to_a.collect{|n| n.to_s}
|
373
|
+
stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
|
374
|
+
sleep 0.01
|
375
|
+
[e,e+".alt"].extend TSV::MultipleResult
|
376
|
+
end
|
377
|
+
assert_equal size*2, stream.read.split("\n").length
|
378
|
+
end
|
369
379
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.13.
|
4
|
+
version: 5.13.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -136,6 +136,7 @@ files:
|
|
136
136
|
- lib/rbbt/persist/tsv/kyotocabinet.rb
|
137
137
|
- lib/rbbt/persist/tsv/leveldb.rb
|
138
138
|
- lib/rbbt/persist/tsv/lmdb.rb
|
139
|
+
- lib/rbbt/persist/tsv/sharder.rb
|
139
140
|
- lib/rbbt/persist/tsv/tokyocabinet.rb
|
140
141
|
- lib/rbbt/resource.rb
|
141
142
|
- lib/rbbt/resource/path.rb
|
@@ -278,6 +279,7 @@ files:
|
|
278
279
|
- test/rbbt/persist/tsv/test_kyotocabinet.rb
|
279
280
|
- test/rbbt/persist/tsv/test_leveldb.rb
|
280
281
|
- test/rbbt/persist/tsv/test_lmdb.rb
|
282
|
+
- test/rbbt/persist/tsv/test_sharder.rb
|
281
283
|
- test/rbbt/persist/tsv/test_tokyocabinet.rb
|
282
284
|
- test/rbbt/resource/test_path.rb
|
283
285
|
- test/rbbt/test_annotations.rb
|
@@ -395,6 +397,7 @@ test_files:
|
|
395
397
|
- test/rbbt/persist/test_tsv.rb
|
396
398
|
- test/rbbt/persist/tsv/test_lmdb.rb
|
397
399
|
- test/rbbt/persist/tsv/test_kyotocabinet.rb
|
400
|
+
- test/rbbt/persist/tsv/test_sharder.rb
|
398
401
|
- test/rbbt/persist/tsv/test_cdb.rb
|
399
402
|
- test/rbbt/persist/tsv/test_tokyocabinet.rb
|
400
403
|
- test/rbbt/persist/tsv/test_leveldb.rb
|