rbbt-util 5.13.36 → 5.13.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/fix_width_table.rb +40 -32
- data/lib/rbbt/monitor.rb +2 -1
- data/lib/rbbt/packed_index.rb +79 -0
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/persist/tsv.rb +17 -7
- data/lib/rbbt/persist/tsv/fix_width_table.rb +110 -0
- data/lib/rbbt/persist/tsv/sharder.rb +34 -14
- data/lib/rbbt/resource.rb +16 -5
- data/lib/rbbt/resource/rake.rb +15 -13
- data/lib/rbbt/tsv/accessor.rb +18 -6
- data/lib/rbbt/tsv/parallel/traverse.rb +3 -3
- data/lib/rbbt/tsv/parser.rb +4 -0
- data/lib/rbbt/tsv/serializers.rb +2 -2
- data/lib/rbbt/tsv/stream.rb +1 -1
- data/lib/rbbt/util/concurrency/processes/socket.rb +9 -9
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -9
- data/lib/rbbt/util/misc.rb +2 -0
- data/lib/rbbt/util/misc/concurrent_stream.rb +0 -1
- data/test/rbbt/persist/tsv/test_sharder.rb +40 -0
- data/test/rbbt/test_packed_index.rb +25 -0
- data/test/rbbt/tsv/parallel/test_traverse.rb +20 -20
- data/test/rbbt/util/concurrency/processes/test_socket.rb +34 -3
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 26df367cce98f5b14b985bd311d4b6594dcb62a9
|
|
4
|
+
data.tar.gz: 1931a495ac7eb8eb17dcfa0f154be8c9d61e8b7f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e71fee09c427fdf3faf326b75897dc6d070ec605bc94b3785d8cacf88f2bd3fa1fdd8d017e5e06d5e48cff48960cb8e7debb337b3a4eeb3361d2fd3735b2b1f3
|
|
7
|
+
data.tar.gz: 14a1b01824edd6bfa2a6ead19828454ea57b1c3846eb1f332a0b15faee9bf5019fdda715732ef57b1326ce3870d8c2f8d8c6c9f2faf6e70218ceea76f373436d
|
data/lib/rbbt/fix_width_table.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
class FixWidthTable
|
|
2
2
|
|
|
3
|
-
attr_accessor :filename, :file, :value_size, :record_size, :range, :size
|
|
3
|
+
attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
|
|
4
4
|
def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
|
|
5
5
|
@filename = filename
|
|
6
6
|
|
|
@@ -8,13 +8,14 @@ class FixWidthTable
|
|
|
8
8
|
Log.debug "FixWidthTable create: #{ filename }"
|
|
9
9
|
@value_size = value_size
|
|
10
10
|
@range = range
|
|
11
|
-
@record_size = @value_size + (@range ?
|
|
11
|
+
@record_size = @value_size + (@range ? 16 : 8)
|
|
12
12
|
|
|
13
13
|
if %w(memory stringio).include? filename.to_s.downcase
|
|
14
14
|
@filename = :memory
|
|
15
15
|
@file = StringIO.new
|
|
16
16
|
else
|
|
17
17
|
FileUtils.rm @filename if File.exists? @filename
|
|
18
|
+
FileUtils.mkdir_p File.dirname(@filename) unless File.exists? @filename
|
|
18
19
|
@file = File.open(@filename, 'wb')
|
|
19
20
|
end
|
|
20
21
|
|
|
@@ -24,17 +25,26 @@ class FixWidthTable
|
|
|
24
25
|
else
|
|
25
26
|
Log.debug "FixWidthTable up-to-date: #{ filename }"
|
|
26
27
|
if in_memory
|
|
27
|
-
@file = StringIO.new(Open.read(@filename, :mode => '
|
|
28
|
+
@file = StringIO.new(Open.read(@filename, :mode => 'r:ASCII-8BIT'), 'r')
|
|
28
29
|
else
|
|
29
|
-
@file = File.open(@filename, 'r')
|
|
30
|
+
@file = File.open(@filename, 'r:ASCII-8BIT')
|
|
30
31
|
end
|
|
31
32
|
@value_size = @file.read(4).unpack("L").first
|
|
32
33
|
@range = @file.read(1).unpack("C").first == 1
|
|
33
34
|
@record_size = @value_size + (@range ? 12 : 4)
|
|
34
35
|
@size = (File.size(@filename) - 5) / (@record_size)
|
|
35
36
|
end
|
|
37
|
+
|
|
38
|
+
@mask = "a#{value_size}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def persistence_path
|
|
42
|
+
@filename
|
|
36
43
|
end
|
|
37
44
|
|
|
45
|
+
def persistence_path=(value)
|
|
46
|
+
@filename=value
|
|
47
|
+
end
|
|
38
48
|
|
|
39
49
|
CONNECTIONS = {} unless defined? CONNECTIONS
|
|
40
50
|
def self.get(filename, value_size = nil, range = nil, update = false)
|
|
@@ -50,28 +60,18 @@ class FixWidthTable
|
|
|
50
60
|
def format(pos, value)
|
|
51
61
|
padding = value_size - value.length
|
|
52
62
|
if range
|
|
53
|
-
(pos + [value + ("\0" * padding)]).pack("
|
|
63
|
+
(pos + [padding, value + ("\0" * padding)]).pack("llll#{mask}")
|
|
54
64
|
else
|
|
55
|
-
[pos, value + ("\0" * padding)].pack("
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
def unformat(format)
|
|
60
|
-
if range
|
|
61
|
-
pos_start, pos_end, pos_overlap, value = format.unpack("llla#{value_size}")
|
|
62
|
-
[[pos_start, pos_end, pos_overlap], value.strip]
|
|
63
|
-
else
|
|
64
|
-
pos, value = format.unpack("la#{value_size}")
|
|
65
|
-
[pos, value.strip]
|
|
65
|
+
[pos, padding, value + ("\0" * padding)].pack("ll#{mask}")
|
|
66
66
|
end
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
def add(pos, value)
|
|
70
70
|
format = format(pos, value)
|
|
71
71
|
@file.write format
|
|
72
|
+
|
|
72
73
|
@size += 1
|
|
73
74
|
end
|
|
74
|
-
alias << add
|
|
75
75
|
|
|
76
76
|
def last_pos
|
|
77
77
|
pos(size - 1)
|
|
@@ -98,13 +98,15 @@ class FixWidthTable
|
|
|
98
98
|
def value(index)
|
|
99
99
|
return nil if index < 0 or index >= size
|
|
100
100
|
@file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
|
|
101
|
-
@file.read(
|
|
101
|
+
padding = @file.read(4).unpack("l").first+1
|
|
102
|
+
txt = @file.read(value_size)
|
|
103
|
+
txt.unpack(mask).first[0..-padding]
|
|
102
104
|
end
|
|
103
105
|
|
|
104
|
-
def read
|
|
106
|
+
def read(force = false)
|
|
105
107
|
return if @filename == :memory
|
|
106
108
|
@file.close unless @file.closed?
|
|
107
|
-
@file = File.open(
|
|
109
|
+
@file = File.open(filename, 'r:ASCII-8BIT')
|
|
108
110
|
end
|
|
109
111
|
|
|
110
112
|
def close
|
|
@@ -125,17 +127,20 @@ class FixWidthTable
|
|
|
125
127
|
end
|
|
126
128
|
end
|
|
127
129
|
|
|
130
|
+
def add_range_point(pos, value)
|
|
131
|
+
@latest ||= []
|
|
132
|
+
while @latest.any? and @latest[0] < pos[0]
|
|
133
|
+
@latest.shift
|
|
134
|
+
end
|
|
135
|
+
overlap = @latest.length
|
|
136
|
+
add pos + [overlap], value
|
|
137
|
+
@latest << pos[1]
|
|
138
|
+
end
|
|
139
|
+
|
|
128
140
|
def add_range(data)
|
|
129
|
-
latest = []
|
|
141
|
+
@latest = []
|
|
130
142
|
data.sort_by{|value, pos| pos[0] }.each do |value, pos|
|
|
131
|
-
|
|
132
|
-
latest.shift
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
overlap = latest.length
|
|
136
|
-
|
|
137
|
-
add pos + [overlap], value
|
|
138
|
-
latest << pos[1]
|
|
143
|
+
add_range_point(pos, value)
|
|
139
144
|
end
|
|
140
145
|
end
|
|
141
146
|
|
|
@@ -169,12 +174,14 @@ class FixWidthTable
|
|
|
169
174
|
end
|
|
170
175
|
|
|
171
176
|
def get_range(pos)
|
|
172
|
-
|
|
177
|
+
case pos
|
|
178
|
+
when Range
|
|
173
179
|
r_start = pos.begin
|
|
174
180
|
r_end = pos.end
|
|
181
|
+
when Array
|
|
182
|
+
r_start, r_end = pos
|
|
175
183
|
else
|
|
176
|
-
r_start = pos
|
|
177
|
-
r_end = pos.to_i
|
|
184
|
+
r_start, r_end = pos, pos
|
|
178
185
|
end
|
|
179
186
|
|
|
180
187
|
idx = closest(r_start)
|
|
@@ -243,6 +250,7 @@ class FixWidthTable
|
|
|
243
250
|
end
|
|
244
251
|
end
|
|
245
252
|
|
|
253
|
+
|
|
246
254
|
def values_at(*list)
|
|
247
255
|
list.collect{|pos|
|
|
248
256
|
self[pos]
|
data/lib/rbbt/monitor.rb
CHANGED
|
@@ -3,7 +3,8 @@ require 'rbbt'
|
|
|
3
3
|
module Rbbt
|
|
4
4
|
|
|
5
5
|
LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
|
|
6
|
-
Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all
|
|
6
|
+
Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
|
|
7
|
+
Rbbt.tmp.produce_locks.find_all
|
|
7
8
|
|
|
8
9
|
SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
|
|
9
10
|
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
class PackedIndex
|
|
2
|
+
attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
|
|
3
|
+
|
|
4
|
+
ELEMS = {
|
|
5
|
+
"I" => ["q", 8],
|
|
6
|
+
"i" => ["l", 4],
|
|
7
|
+
"f" => ["f", 4],
|
|
8
|
+
"F" => ["D", 8],
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
def self.process_mask(mask)
|
|
12
|
+
str = ""
|
|
13
|
+
size = 0
|
|
14
|
+
mask.each do |e|
|
|
15
|
+
if ELEMS.include? e
|
|
16
|
+
str << ELEMS[e][0]
|
|
17
|
+
size += ELEMS[e][1]
|
|
18
|
+
elsif e =~ /^(\d+)s$/
|
|
19
|
+
num = $1.to_i
|
|
20
|
+
str << "a" << num.to_s
|
|
21
|
+
size += num
|
|
22
|
+
else
|
|
23
|
+
e, num = e.split(":")
|
|
24
|
+
str << e
|
|
25
|
+
size = (num.nil? ? size + 1 : size + num.to_i)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
[str, size]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(file, write = false, pattern = nil)
|
|
32
|
+
@file = file
|
|
33
|
+
if write
|
|
34
|
+
@stream = Open.open(file, :mode => 'wb')
|
|
35
|
+
@mask, @item_size = PackedIndex.process_mask pattern
|
|
36
|
+
header = [@mask.length, @item_size].pack("ll")
|
|
37
|
+
@stream.write(header)
|
|
38
|
+
@stream.write(mask)
|
|
39
|
+
@offset = @mask.length + 8
|
|
40
|
+
else
|
|
41
|
+
@stream = Open.open(file, :mode => 'rb')
|
|
42
|
+
header = @stream.read(8)
|
|
43
|
+
mask_length, @item_size = header.unpack("ll")
|
|
44
|
+
@mask = @stream.read(mask_length)
|
|
45
|
+
@offset = @mask.length + 8
|
|
46
|
+
end
|
|
47
|
+
@nil_string = "[NIL]" + "-" * (@item_size - 5)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def read
|
|
51
|
+
close
|
|
52
|
+
@stream = Open.open(file, :mode => 'rb')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def <<(payload)
|
|
56
|
+
if payload.nil?
|
|
57
|
+
@stream.write nil_string
|
|
58
|
+
else
|
|
59
|
+
@stream.write payload.pack(mask)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def [](position)
|
|
64
|
+
@stream.seek(position * item_size + offset)
|
|
65
|
+
encoded = @stream.read(item_size)
|
|
66
|
+
return nil if encoded == nil_string
|
|
67
|
+
encoded.unpack mask
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def values_at(*positions)
|
|
71
|
+
positions.collect{|p|
|
|
72
|
+
self[p]
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def close
|
|
77
|
+
stream.close unless stream.closed?
|
|
78
|
+
end
|
|
79
|
+
end
|
data/lib/rbbt/persist.rb
CHANGED
|
@@ -204,7 +204,7 @@ module Persist
|
|
|
204
204
|
ConcurrentStream.setup(out, :threads => saver_thread, :filename => path)
|
|
205
205
|
out.callback = callback
|
|
206
206
|
out.abort_callback = abort_callback
|
|
207
|
-
out.lockfile = stream.lockfile
|
|
207
|
+
out.lockfile = stream.lockfile if stream.respond_to? :lockfile
|
|
208
208
|
out
|
|
209
209
|
end
|
|
210
210
|
|
data/lib/rbbt/persist/tsv.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
require 'rbbt/persist/tsv/adapter'
|
|
2
2
|
|
|
3
|
+
require 'rbbt/persist/tsv/fix_width_table'
|
|
4
|
+
|
|
3
5
|
begin
|
|
4
6
|
require 'rbbt/persist/tsv/tokyocabinet'
|
|
5
7
|
rescue Exception
|
|
@@ -46,7 +48,7 @@ module Persist
|
|
|
46
48
|
end || source.object_id.to_s
|
|
47
49
|
end
|
|
48
50
|
|
|
49
|
-
def self.open_database(path, write, serializer = nil, type = "HDB")
|
|
51
|
+
def self.open_database(path, write, serializer = nil, type = "HDB", options = {})
|
|
50
52
|
case type
|
|
51
53
|
when "LevelDB"
|
|
52
54
|
Persist.open_leveldb(path, write, serializer)
|
|
@@ -56,6 +58,13 @@ module Persist
|
|
|
56
58
|
Persist.open_lmdb(path, write, serializer)
|
|
57
59
|
when 'kch', 'kct'
|
|
58
60
|
Persist.open_kyotocabinet(path, write, serializer, type)
|
|
61
|
+
when 'fwt'
|
|
62
|
+
value_size, range, update, in_memory, pos_function = Misc.process_options options.dup, :value_size, :range, :update, :in_memory, :pos_function
|
|
63
|
+
if pos_function
|
|
64
|
+
Persist.open_fwt(path, value_size, range, serializer, update, in_memory, &pos_function)
|
|
65
|
+
else
|
|
66
|
+
Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
|
|
67
|
+
end
|
|
59
68
|
else
|
|
60
69
|
Persist.open_tokyocabinet(path, write, serializer, type)
|
|
61
70
|
end
|
|
@@ -86,9 +95,9 @@ module Persist
|
|
|
86
95
|
if is_persisted?(path) and not persist_options[:update]
|
|
87
96
|
Log.debug "TSV persistence up-to-date: #{ path }"
|
|
88
97
|
if persist_options[:shard_function]
|
|
89
|
-
return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
|
|
98
|
+
return open_sharder(path, false, nil, persist_options[:engine], persist_options, &persist_options[:shard_function])
|
|
90
99
|
else
|
|
91
|
-
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
|
|
100
|
+
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB, persist_options)
|
|
92
101
|
end
|
|
93
102
|
end
|
|
94
103
|
|
|
@@ -98,9 +107,9 @@ module Persist
|
|
|
98
107
|
Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
|
|
99
108
|
|
|
100
109
|
if persist_options[:shard_function]
|
|
101
|
-
return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
|
|
110
|
+
return open_sharder(path, false, nil, persist_options[:engine], persist_options, &persist_options[:shard_function])
|
|
102
111
|
else
|
|
103
|
-
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
|
|
112
|
+
return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB, persist_options)
|
|
104
113
|
end
|
|
105
114
|
end
|
|
106
115
|
|
|
@@ -111,9 +120,9 @@ module Persist
|
|
|
111
120
|
tmp_path = path + '.persist'
|
|
112
121
|
|
|
113
122
|
data = if persist_options[:shard_function]
|
|
114
|
-
open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
|
|
123
|
+
open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], persist_options, &persist_options[:shard_function])
|
|
115
124
|
else
|
|
116
|
-
open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
|
|
125
|
+
open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB, persist_options)
|
|
117
126
|
end
|
|
118
127
|
|
|
119
128
|
if TSV === data and data.serializer.nil?
|
|
@@ -127,6 +136,7 @@ module Persist
|
|
|
127
136
|
FileUtils.mv data.persistence_path, path if File.exists? data.persistence_path and not File.exists? path
|
|
128
137
|
tsv = CONNECTIONS[path] = CONNECTIONS.delete tmp_path
|
|
129
138
|
tsv.persistence_path = path
|
|
139
|
+
|
|
130
140
|
tsv.fix_io if tsv.respond_to? :fix_io
|
|
131
141
|
|
|
132
142
|
data
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
require 'rbbt/fix_width_table'
|
|
2
|
+
|
|
3
|
+
module Persist
|
|
4
|
+
|
|
5
|
+
module FWTAdapter
|
|
6
|
+
include Persist::TSVAdapter
|
|
7
|
+
|
|
8
|
+
attr_accessor :pos_function
|
|
9
|
+
|
|
10
|
+
def self.open(path, value_size, range = false, update = false, in_memory = false, &pos_function)
|
|
11
|
+
db = CONNECTIONS[path] ||= FixWidthTable.new(path, value_size, range, update, in_memory)
|
|
12
|
+
db.extend Persist::FWTAdapter
|
|
13
|
+
db.persistence_path = path
|
|
14
|
+
db.pos_function = pos_function
|
|
15
|
+
db
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def persistence_path=(value)
|
|
19
|
+
@persistence_path = value
|
|
20
|
+
@filename = value
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def metadata_file
|
|
24
|
+
@metadata_file ||= self.persistence_path + '.metadata'
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def metadata
|
|
28
|
+
return {} unless File.exists? metadata_file
|
|
29
|
+
Open.open(metadata_file, :mode => "rb") do |f|
|
|
30
|
+
Marshal.load(f)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def set_metadata(k,v)
|
|
35
|
+
metadata = self.metadata
|
|
36
|
+
metadata[k] = v
|
|
37
|
+
Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def [](key)
|
|
41
|
+
if TSV::ENTRY_KEYS.include? key
|
|
42
|
+
metadata[key]
|
|
43
|
+
else
|
|
44
|
+
key = pos_function.call(key) if pos_function
|
|
45
|
+
res = super(key)
|
|
46
|
+
res.extend MultipleResult
|
|
47
|
+
res
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def []=(key, value)
|
|
52
|
+
if TSV::ENTRY_KEYS.include? key
|
|
53
|
+
set_metadata(key, value)
|
|
54
|
+
else
|
|
55
|
+
if range
|
|
56
|
+
add_range_point key, value
|
|
57
|
+
else
|
|
58
|
+
add key, value
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def add(key, value)
|
|
64
|
+
key = pos_function.call(key) if pos_function and not (range and Array === key)
|
|
65
|
+
super(key, value)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def add_range_point(key, value)
|
|
69
|
+
key = pos_function.call(key) if pos_function
|
|
70
|
+
super(key, value)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def <<(key, value)
|
|
74
|
+
self.send(:[]=, *i)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def include?(i)
|
|
78
|
+
return true if Fixnum === i and i < pos(@size)
|
|
79
|
+
return true if metadata.include? i
|
|
80
|
+
false
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def size
|
|
84
|
+
@size #+ metadata.keys.length
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def each
|
|
88
|
+
@size.times do |i|
|
|
89
|
+
yield i, value(i)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def keys
|
|
94
|
+
[]
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def self.open_fwt(path, value_size, range = false, serializer = nil, update = false, in_memory = false, &pos_function)
|
|
99
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
|
100
|
+
|
|
101
|
+
database = Persist::FWTAdapter.open(path, value_size, range, update, in_memory, &pos_function)
|
|
102
|
+
|
|
103
|
+
unless serializer == :clean
|
|
104
|
+
TSV.setup database
|
|
105
|
+
database.serializer = serializer || database.serializer
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
database
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
module Persist
|
|
2
2
|
module SharderAdapter
|
|
3
|
-
def self.open(path, write, type=nil, &block)
|
|
3
|
+
def self.open(path, write, type=nil, options = {}, &block)
|
|
4
4
|
|
|
5
|
-
database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
|
|
5
|
+
database = CONNECTIONS[path] ||= Sharder.new(path, write, type, options, &block)
|
|
6
6
|
|
|
7
7
|
database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
|
|
8
8
|
|
|
9
9
|
database
|
|
10
10
|
end
|
|
11
|
-
|
|
12
11
|
end
|
|
13
12
|
|
|
14
13
|
class Sharder
|
|
15
|
-
attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
|
|
14
|
+
attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type, :options
|
|
16
15
|
|
|
17
|
-
def initialize(persistence_path, write = false, db_type=nil, &block)
|
|
16
|
+
def initialize(persistence_path, write = false, db_type=nil, options = {}, &block)
|
|
18
17
|
@shard_function = block
|
|
18
|
+
@options = options
|
|
19
19
|
@persistence_path = Path.setup(persistence_path)
|
|
20
20
|
@mutex = Mutex.new
|
|
21
21
|
@writable = write
|
|
@@ -32,6 +32,7 @@ module Persist
|
|
|
32
32
|
|
|
33
33
|
def persistence_path=(path)
|
|
34
34
|
@persistence_path = path
|
|
35
|
+
databases.values.each{|db| db.persistence_path = File.join(path, File.basename(db.persistence_path))}
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
def databases
|
|
@@ -39,21 +40,32 @@ module Persist
|
|
|
39
40
|
hash = {}
|
|
40
41
|
@persistence_path.glob('shard-*').each do |f|
|
|
41
42
|
shard = File.basename(f).match(/shard-(.*)/)[1]
|
|
42
|
-
|
|
43
|
+
if shard == 'metadata'
|
|
44
|
+
hash[shard] = Persist.open_database(f, false, :clean, "HDB", @options)
|
|
45
|
+
else
|
|
46
|
+
hash[shard] = Persist.open_database(f, false, :clean, db_type, @options)
|
|
47
|
+
end
|
|
43
48
|
end
|
|
44
49
|
hash
|
|
45
50
|
end
|
|
46
51
|
end
|
|
47
52
|
|
|
48
53
|
def database(key)
|
|
49
|
-
shard = key =~ /__tsv_/ ? "
|
|
54
|
+
shard = key =~ /__tsv_/ ? "metadata" : shard_function.call(key)
|
|
50
55
|
if databases.include? shard
|
|
51
56
|
databases[shard]
|
|
52
57
|
else
|
|
53
|
-
|
|
58
|
+
if shard == 'metadata'
|
|
59
|
+
database ||= begin
|
|
60
|
+
path = File.join(persistence_path, 'shard-' << shard.to_s)
|
|
61
|
+
(writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, "HDB", @options) : nil
|
|
62
|
+
end
|
|
63
|
+
else
|
|
64
|
+
database ||= begin
|
|
54
65
|
path = File.join(persistence_path, 'shard-' << shard.to_s)
|
|
55
|
-
(writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
|
|
66
|
+
(writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type, @options) : nil
|
|
56
67
|
end
|
|
68
|
+
end
|
|
57
69
|
if database
|
|
58
70
|
databases[shard] = database
|
|
59
71
|
else
|
|
@@ -84,6 +96,7 @@ module Persist
|
|
|
84
96
|
end
|
|
85
97
|
|
|
86
98
|
def read(force = false)
|
|
99
|
+
raise "SIOT"
|
|
87
100
|
return if not write? and not closed and not force
|
|
88
101
|
self.close
|
|
89
102
|
databases.each{|d| d.read }
|
|
@@ -204,23 +217,30 @@ module Persist
|
|
|
204
217
|
databases.values.each{|database| database.write }
|
|
205
218
|
end
|
|
206
219
|
|
|
207
|
-
def read
|
|
208
|
-
databases.values.each{|database| database.read }
|
|
220
|
+
def read(force = false)
|
|
221
|
+
databases.values.each{|database| database.read(force) }
|
|
209
222
|
end
|
|
210
223
|
|
|
211
224
|
def close
|
|
212
225
|
databases.values.each{|database| database.close }
|
|
213
226
|
end
|
|
227
|
+
|
|
228
|
+
def size
|
|
229
|
+
databases.inject(0){|acc,i|
|
|
230
|
+
shard, db = i;
|
|
231
|
+
acc += db.size
|
|
232
|
+
}
|
|
233
|
+
end
|
|
214
234
|
end
|
|
215
235
|
|
|
216
|
-
def self.open_sharder(path, write, serializer = nil,
|
|
236
|
+
def self.open_sharder(path, write, serializer = nil, type = TokyoCabinet::HDB, options, &shard_function)
|
|
217
237
|
write = true unless File.exists? path
|
|
218
238
|
|
|
219
239
|
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
|
220
240
|
|
|
221
|
-
database = Persist::SharderAdapter.open(path, write,
|
|
241
|
+
database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
|
|
222
242
|
|
|
223
|
-
unless serializer == :clean
|
|
243
|
+
unless serializer == :clean #or type.to_s == 'fwt'
|
|
224
244
|
TSV.setup database
|
|
225
245
|
database.serializer = serializer if serializer
|
|
226
246
|
end
|
data/lib/rbbt/resource.rb
CHANGED
|
@@ -6,6 +6,15 @@ require 'set'
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
module Resource
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
attr_accessor :lock_dir
|
|
12
|
+
|
|
13
|
+
def lock_dir
|
|
14
|
+
@lock_dir ||= Rbbt.tmp.produce_locks.find
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
9
18
|
def self.remote_servers
|
|
10
19
|
@remote_servers = Rbbt.etc.file_servers.exists? ? Rbbt.etc.file_servers.yaml : {}
|
|
11
20
|
end
|
|
@@ -65,7 +74,8 @@ module Resource
|
|
|
65
74
|
begin
|
|
66
75
|
@server_missing_resource_cache ||= Set.new
|
|
67
76
|
raise "Resource Not Found" if @server_missing_resource_cache.include? url
|
|
68
|
-
|
|
77
|
+
lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
|
|
78
|
+
Misc.lock lock_filename do
|
|
69
79
|
Net::HTTP.get_response URI(url) do |response|
|
|
70
80
|
case response
|
|
71
81
|
when Net::HTTPSuccess, Net::HTTPOK
|
|
@@ -115,15 +125,16 @@ module Resource
|
|
|
115
125
|
final_path = path.respond_to?(:find) ? (force ? path.find(:user) : path.find) : path
|
|
116
126
|
if not File.exists? final_path or force
|
|
117
127
|
Log.medium "Producing: #{ final_path }"
|
|
118
|
-
|
|
128
|
+
lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
|
|
129
|
+
Misc.lock lock_filename do
|
|
119
130
|
if not File.exists? final_path or force
|
|
120
131
|
(remote_server and get_from_server(path, final_path)) or
|
|
121
132
|
begin
|
|
122
133
|
case type
|
|
123
134
|
when :string
|
|
124
|
-
|
|
135
|
+
Misc.sensiblewrite(final_path, content)
|
|
125
136
|
when :url
|
|
126
|
-
|
|
137
|
+
Misc.sensiblewrite(final_path, Open.open(content))
|
|
127
138
|
when :proc
|
|
128
139
|
data = case content.arity
|
|
129
140
|
when 0
|
|
@@ -131,7 +142,7 @@ module Resource
|
|
|
131
142
|
when 1
|
|
132
143
|
content.call final_path
|
|
133
144
|
end
|
|
134
|
-
|
|
145
|
+
Misc.sensiblewrite(final_path, data) unless data.nil?
|
|
135
146
|
when :rake
|
|
136
147
|
run_rake(path, content, rake_dir)
|
|
137
148
|
when :install
|
data/lib/rbbt/resource/rake.rb
CHANGED
|
@@ -38,20 +38,22 @@ module Rake
|
|
|
38
38
|
raise TaskNotFound if Rake::Task[task].nil?
|
|
39
39
|
|
|
40
40
|
t = nil
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
41
|
+
pid = Process.fork{
|
|
42
|
+
Misc.pre_fork
|
|
43
|
+
begin
|
|
44
|
+
Misc.in_dir(dir) do
|
|
45
|
+
Rake::Task[task].invoke
|
|
46
|
+
|
|
47
|
+
Rake::Task.clear
|
|
48
|
+
Rake::FileTask.clear_files
|
|
49
|
+
end
|
|
50
|
+
rescue
|
|
51
|
+
Log.error "Error in rake: #{$!.message}"
|
|
52
|
+
raise $!
|
|
48
53
|
end
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
end
|
|
53
|
-
#}
|
|
54
|
-
#Process.wait(pid)
|
|
54
|
+
}
|
|
55
|
+
Process.waitpid(pid)
|
|
56
|
+
raise "Rake failed" unless $?.success?
|
|
55
57
|
|
|
56
58
|
end
|
|
57
59
|
end
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
|
@@ -94,6 +94,7 @@ module TSV
|
|
|
94
94
|
begin
|
|
95
95
|
super
|
|
96
96
|
rescue Exception
|
|
97
|
+
Log.exception $!
|
|
97
98
|
@writable = false
|
|
98
99
|
self
|
|
99
100
|
end
|
|
@@ -197,12 +198,7 @@ module TSV
|
|
|
197
198
|
|
|
198
199
|
#{{{ GETTERS AND SETTERS
|
|
199
200
|
|
|
200
|
-
|
|
201
|
-
def [](key, clean = false)
|
|
202
|
-
value = super(key)
|
|
203
|
-
return value if clean or value.nil?
|
|
204
|
-
@serializer_module ||= self.serializer_module
|
|
205
|
-
|
|
201
|
+
def prepare_value(key, value)
|
|
206
202
|
value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
|
|
207
203
|
|
|
208
204
|
return value if @unnamed or fields.nil?
|
|
@@ -218,6 +214,20 @@ module TSV
|
|
|
218
214
|
value
|
|
219
215
|
end
|
|
220
216
|
|
|
217
|
+
def [](key, clean = false)
|
|
218
|
+
value = super(key)
|
|
219
|
+
return value if clean or value.nil?
|
|
220
|
+
@serializer_module ||= self.serializer_module
|
|
221
|
+
|
|
222
|
+
if MultipleResult === value
|
|
223
|
+
res = value.collect{|v| prepare_value key, v }
|
|
224
|
+
res.extend MultipleResult
|
|
225
|
+
res
|
|
226
|
+
else
|
|
227
|
+
prepare_value key, value
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
221
231
|
def []=(key, value, clean = false)
|
|
222
232
|
return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
|
|
223
233
|
super(key, @serializer_module.dump(value))
|
|
@@ -634,6 +644,7 @@ Example:
|
|
|
634
644
|
if merge
|
|
635
645
|
self.through do |key,values|
|
|
636
646
|
field_values = values.delete_at field_pos
|
|
647
|
+
next if field_values.nil?
|
|
637
648
|
zipped = values.zip_fields
|
|
638
649
|
field_values.zip(zipped).each do |field_value,rest|
|
|
639
650
|
k = [key,field_value]*":"
|
|
@@ -648,6 +659,7 @@ Example:
|
|
|
648
659
|
else
|
|
649
660
|
self.through do |key,values|
|
|
650
661
|
field_values = values.delete_at field_pos
|
|
662
|
+
next if field_values.nil?
|
|
651
663
|
zipped = Misc.zip_fields(values)
|
|
652
664
|
field_values.zip(zipped).each do |field_value,rest|
|
|
653
665
|
k = [key,field_value]*":"
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
module TSV
|
|
2
2
|
|
|
3
|
-
module MultipleResult; end
|
|
4
|
-
|
|
5
3
|
def self.obj_stream(obj)
|
|
6
4
|
case obj
|
|
7
5
|
when nil
|
|
@@ -510,13 +508,15 @@ module TSV
|
|
|
510
508
|
|
|
511
509
|
bar = Misc.process_options options, :bar
|
|
512
510
|
bar ||= Misc.process_options options, :progress
|
|
513
|
-
max = guess_max(obj)
|
|
514
511
|
options[:bar] = case bar
|
|
515
512
|
when String
|
|
513
|
+
max = guess_max(obj)
|
|
516
514
|
Log::ProgressBar.new_bar(max, {:desc => bar})
|
|
517
515
|
when TrueClass
|
|
516
|
+
max = guess_max(obj)
|
|
518
517
|
Log::ProgressBar.new_bar(max, nil)
|
|
519
518
|
when Fixnum
|
|
519
|
+
max = guess_max(obj)
|
|
520
520
|
Log::ProgressBar.new_bar(bar)
|
|
521
521
|
when Hash
|
|
522
522
|
max = Misc.process_options(bar, :max) || max
|
data/lib/rbbt/tsv/parser.rb
CHANGED
|
@@ -554,6 +554,10 @@ module TSV
|
|
|
554
554
|
self
|
|
555
555
|
end
|
|
556
556
|
|
|
557
|
+
def identify_field(field)
|
|
558
|
+
TSV.identify_field(key_field, fields, field)
|
|
559
|
+
end
|
|
560
|
+
|
|
557
561
|
def self.traverse(stream, options = {}, &block)
|
|
558
562
|
parser = Parser.new(stream, options)
|
|
559
563
|
parser.traverse(options, &block)
|
data/lib/rbbt/tsv/serializers.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
module TSV
|
|
2
|
+
|
|
2
3
|
class CleanSerializer
|
|
3
4
|
def self.dump(o); o end
|
|
4
5
|
def self.load(o); o end
|
|
@@ -26,7 +27,7 @@ module TSV
|
|
|
26
27
|
|
|
27
28
|
class StringSerializer
|
|
28
29
|
def self.dump(str); str.to_s; end
|
|
29
|
-
def self.load(str); str; end
|
|
30
|
+
def self.load(str); str.dup; end
|
|
30
31
|
end
|
|
31
32
|
|
|
32
33
|
class StringArraySerializer
|
|
@@ -61,7 +62,6 @@ module TSV
|
|
|
61
62
|
end
|
|
62
63
|
end
|
|
63
64
|
|
|
64
|
-
|
|
65
65
|
class TSVSerializer
|
|
66
66
|
def self.dump(tsv)
|
|
67
67
|
tsv.to_s
|
data/lib/rbbt/tsv/stream.rb
CHANGED
|
@@ -69,7 +69,6 @@ module TSV
|
|
|
69
69
|
out = Misc.open_pipe do |sin|
|
|
70
70
|
num_streams = streams.length
|
|
71
71
|
|
|
72
|
-
|
|
73
72
|
streams = streams.collect do |stream|
|
|
74
73
|
sorted = Misc.sort_stream(stream)
|
|
75
74
|
stream.annotate sorted if stream.respond_to? :annotate
|
|
@@ -150,6 +149,7 @@ module TSV
|
|
|
150
149
|
|
|
151
150
|
sin.puts [min, str*sep] * sep
|
|
152
151
|
end
|
|
152
|
+
|
|
153
153
|
streams.each do |stream|
|
|
154
154
|
stream.join if stream.respond_to? :join
|
|
155
155
|
end
|
|
@@ -3,12 +3,13 @@ require 'rbbt/util/semaphore'
|
|
|
3
3
|
class RbbtProcessQueue
|
|
4
4
|
class RbbtProcessSocket
|
|
5
5
|
|
|
6
|
-
Serializer = Marshal
|
|
7
|
-
|
|
8
6
|
attr_accessor :sread, :swrite, :write_sem, :read_sem
|
|
9
|
-
def initialize
|
|
7
|
+
def initialize(serializer = nil)
|
|
10
8
|
@sread, @swrite = Misc.pipe
|
|
11
9
|
|
|
10
|
+
@serializer = serializer || Marshal
|
|
11
|
+
|
|
12
|
+
|
|
12
13
|
key = "/" << rand(100000000).to_s;
|
|
13
14
|
@write_sem = key + '.in'
|
|
14
15
|
@read_sem = key + '.out'
|
|
@@ -30,16 +31,15 @@ class RbbtProcessQueue
|
|
|
30
31
|
case obj
|
|
31
32
|
when String
|
|
32
33
|
payload = obj
|
|
33
|
-
size_head = [payload.bytesize,"
|
|
34
|
+
size_head = [payload.bytesize,"C"].pack 'La'
|
|
34
35
|
str = size_head << payload
|
|
35
36
|
else
|
|
36
|
-
payload =
|
|
37
|
-
size_head = [payload.bytesize,"
|
|
37
|
+
payload = @serializer.dump(obj)
|
|
38
|
+
size_head = [payload.bytesize,"S"].pack 'La'
|
|
38
39
|
str = size_head << payload
|
|
39
40
|
end
|
|
40
41
|
|
|
41
42
|
write_length = str.length
|
|
42
|
-
#IO.select(nil, [stream])
|
|
43
43
|
wrote = stream.write(str)
|
|
44
44
|
while wrote < write_length
|
|
45
45
|
wrote += stream.write(str[wrote..-1])
|
|
@@ -54,9 +54,9 @@ class RbbtProcessQueue
|
|
|
54
54
|
begin
|
|
55
55
|
payload = Misc.read_stream stream, size
|
|
56
56
|
case type
|
|
57
|
-
when "M"
|
|
58
|
-
Serializer.load(payload)
|
|
59
57
|
when "S"
|
|
58
|
+
@serializer.load(payload)
|
|
59
|
+
when "C"
|
|
60
60
|
payload
|
|
61
61
|
end
|
|
62
62
|
rescue TryAgain
|
|
@@ -7,15 +7,6 @@ class RbbtProcessQueue
|
|
|
7
7
|
|
|
8
8
|
@pid = Process.fork do
|
|
9
9
|
begin
|
|
10
|
-
|
|
11
|
-
#Persist::CONNECTIONS.values.each do |db| db.close if db.write? end
|
|
12
|
-
#ObjectSpace.each_object(Mutex) do |m|
|
|
13
|
-
# begin
|
|
14
|
-
# m.unlock
|
|
15
|
-
# rescue ThreadError
|
|
16
|
-
# end if m.locked?
|
|
17
|
-
#end
|
|
18
|
-
|
|
19
10
|
Misc.pre_fork
|
|
20
11
|
|
|
21
12
|
@cleanup.call if @cleanup
|
data/lib/rbbt/util/misc.rb
CHANGED
|
@@ -35,5 +35,45 @@ class TestSharder < Test::Unit::TestCase
|
|
|
35
35
|
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
|
+
|
|
39
|
+
def test_shard_fwt
|
|
40
|
+
TmpFile.with_file do |dir|
|
|
41
|
+
shard_function = Proc.new do |key|
|
|
42
|
+
key[0..(key.index(":")-1)]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
pos_function = Proc.new do |key|
|
|
46
|
+
key.split(":").last.to_i
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
size = 10
|
|
50
|
+
sharder = Persist.persist_tsv(nil, "ShardTest", {}, :update => true, :range => false, :value_size => 64, :engine => 'fwt', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :float) do |db|
|
|
51
|
+
size.times do |v|
|
|
52
|
+
v = v + 1
|
|
53
|
+
chr = "chr" << (v % 5).to_s
|
|
54
|
+
key = chr + ":" << v.to_s
|
|
55
|
+
db << [key, v*2]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
sharder.read
|
|
59
|
+
|
|
60
|
+
assert_equal dir, sharder.persistence_path
|
|
61
|
+
assert_equal size, sharder.size
|
|
62
|
+
|
|
63
|
+
assert_equal [4.0], sharder["chr2:2"]
|
|
64
|
+
|
|
65
|
+
count = 0
|
|
66
|
+
sharder.through do |k,v|
|
|
67
|
+
count += 1
|
|
68
|
+
end
|
|
69
|
+
assert_equal count, size
|
|
70
|
+
|
|
71
|
+
sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
|
|
72
|
+
|
|
73
|
+
assert_equal [4.0], sharder["chr2:2"]
|
|
74
|
+
|
|
75
|
+
assert_equal size, sharder.size
|
|
76
|
+
end
|
|
77
|
+
end
|
|
38
78
|
end
|
|
39
79
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
|
|
2
|
+
require 'rbbt/packed_index'
|
|
3
|
+
|
|
4
|
+
class TestPackedIndex < Test::Unit::TestCase
|
|
5
|
+
def test_index
|
|
6
|
+
|
|
7
|
+
TmpFile.with_file do |tmpfile|
|
|
8
|
+
pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
|
|
9
|
+
100.times do |i|
|
|
10
|
+
pi << [i, i+2, i.to_s * 10, rand, rand, rand, rand, rand]
|
|
11
|
+
end
|
|
12
|
+
pi << nil
|
|
13
|
+
pi << nil
|
|
14
|
+
pi.close
|
|
15
|
+
pi = PackedIndex.new tmpfile, false
|
|
16
|
+
100.times do |i|
|
|
17
|
+
assert_equal i, pi[i][0]
|
|
18
|
+
assert_equal i+2, pi[i][1]
|
|
19
|
+
end
|
|
20
|
+
assert_equal nil, pi[100]
|
|
21
|
+
assert_equal nil, pi[101]
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
@@ -6,7 +6,7 @@ class StopException < StandardError; end
|
|
|
6
6
|
|
|
7
7
|
class TestTSVParallelThrough < Test::Unit::TestCase
|
|
8
8
|
|
|
9
|
-
def
|
|
9
|
+
def test_traverse_tsv
|
|
10
10
|
require 'rbbt/sources/organism'
|
|
11
11
|
|
|
12
12
|
head = 100
|
|
@@ -25,7 +25,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
25
25
|
assert_equal head, res.keys.compact.sort.length
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
-
def
|
|
28
|
+
def test_traverse_tsv_cpus
|
|
29
29
|
require 'rbbt/sources/organism'
|
|
30
30
|
|
|
31
31
|
head = 100
|
|
@@ -47,7 +47,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
47
47
|
assert res.values.compact.flatten.uniq.length > 0
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
-
def
|
|
50
|
+
def test_traverse_stream
|
|
51
51
|
require 'rbbt/sources/organism'
|
|
52
52
|
|
|
53
53
|
head = 1000
|
|
@@ -61,7 +61,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
61
61
|
assert_equal head, res.keys.compact.sort.length
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
-
def
|
|
64
|
+
def test_traverse_stream_cpus
|
|
65
65
|
require 'rbbt/sources/organism'
|
|
66
66
|
|
|
67
67
|
head = 1000
|
|
@@ -75,7 +75,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
75
75
|
assert_equal head, res.keys.compact.sort.length
|
|
76
76
|
end
|
|
77
77
|
|
|
78
|
-
def
|
|
78
|
+
def test_traverse_stream_keys
|
|
79
79
|
require 'rbbt/sources/organism'
|
|
80
80
|
|
|
81
81
|
head = 1000
|
|
@@ -99,7 +99,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
99
99
|
assert_equal res.sort, Organism.identifiers("Hsa").tsv(:head => head).keys.sort
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
-
def
|
|
102
|
+
def test_traverse_array
|
|
103
103
|
require 'rbbt/sources/organism'
|
|
104
104
|
|
|
105
105
|
array = []
|
|
@@ -121,7 +121,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
121
121
|
assert_equal array, res
|
|
122
122
|
end
|
|
123
123
|
|
|
124
|
-
def
|
|
124
|
+
def test_traverse_array_threads
|
|
125
125
|
require 'rbbt/sources/organism'
|
|
126
126
|
|
|
127
127
|
array = []
|
|
@@ -142,7 +142,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
142
142
|
assert_equal array.sort, res.sort
|
|
143
143
|
end
|
|
144
144
|
|
|
145
|
-
def
|
|
145
|
+
def test_traverse_array_cpus
|
|
146
146
|
require 'rbbt/sources/organism'
|
|
147
147
|
|
|
148
148
|
array = []
|
|
@@ -157,7 +157,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
157
157
|
assert_equal array.sort, res.sort
|
|
158
158
|
end
|
|
159
159
|
|
|
160
|
-
def
|
|
160
|
+
def test_traverse_benchmark
|
|
161
161
|
require 'rbbt/sources/organism'
|
|
162
162
|
|
|
163
163
|
head = 2_000
|
|
@@ -179,7 +179,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
179
179
|
end
|
|
180
180
|
end
|
|
181
181
|
|
|
182
|
-
def
|
|
182
|
+
def test_traverse_into_dumper
|
|
183
183
|
require 'rbbt/sources/organism'
|
|
184
184
|
|
|
185
185
|
head = 2_000
|
|
@@ -197,7 +197,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
197
197
|
assert_equal head, res.size
|
|
198
198
|
end
|
|
199
199
|
|
|
200
|
-
def
|
|
200
|
+
def test_traverse_into_dumper_threads
|
|
201
201
|
require 'rbbt/sources/organism'
|
|
202
202
|
|
|
203
203
|
head = 2_000
|
|
@@ -217,7 +217,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
217
217
|
assert_equal head, res.size
|
|
218
218
|
end
|
|
219
219
|
|
|
220
|
-
def
|
|
220
|
+
def test_traverse_into_dumper_cpus
|
|
221
221
|
require 'rbbt/sources/organism'
|
|
222
222
|
|
|
223
223
|
head = 2_000
|
|
@@ -238,7 +238,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
238
238
|
|
|
239
239
|
#{{{ TRAVERSE DUMPER
|
|
240
240
|
|
|
241
|
-
def
|
|
241
|
+
def test_traverse_dumper
|
|
242
242
|
require 'rbbt/sources/organism'
|
|
243
243
|
|
|
244
244
|
head = 2_000
|
|
@@ -259,7 +259,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
259
259
|
assert_equal head, res.size
|
|
260
260
|
end
|
|
261
261
|
|
|
262
|
-
def
|
|
262
|
+
def test_traverse_dumper_threads
|
|
263
263
|
require 'rbbt/sources/organism'
|
|
264
264
|
|
|
265
265
|
head = 2_000
|
|
@@ -283,7 +283,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
283
283
|
assert_equal head, res.size
|
|
284
284
|
end
|
|
285
285
|
|
|
286
|
-
def
|
|
286
|
+
def test_traverse_dumper_cpus
|
|
287
287
|
require 'rbbt/sources/organism'
|
|
288
288
|
|
|
289
289
|
head = 10_000
|
|
@@ -305,7 +305,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
305
305
|
assert_equal head, res.size
|
|
306
306
|
end
|
|
307
307
|
|
|
308
|
-
def
|
|
308
|
+
def test_traverse_dumper_exception
|
|
309
309
|
require 'rbbt/sources/organism'
|
|
310
310
|
|
|
311
311
|
head = 2_000
|
|
@@ -346,7 +346,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
346
346
|
end
|
|
347
347
|
end
|
|
348
348
|
|
|
349
|
-
def
|
|
349
|
+
def test_traverse_into_stream
|
|
350
350
|
size = 100
|
|
351
351
|
array = (1..size).to_a.collect{|n| n.to_s}
|
|
352
352
|
stream = TSV.traverse array, :into => :stream do |e|
|
|
@@ -355,7 +355,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
355
355
|
assert_equal size, stream.read.split("\n").length
|
|
356
356
|
end
|
|
357
357
|
|
|
358
|
-
def
|
|
358
|
+
def test_traverse_progress
|
|
359
359
|
size = 1000
|
|
360
360
|
array = (1..size).to_a.collect{|n| n.to_s}
|
|
361
361
|
stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
|
|
@@ -389,12 +389,12 @@ class TestTSVParallelThrough < Test::Unit::TestCase
|
|
|
389
389
|
assert_equal size, stream.read.split("\n").length
|
|
390
390
|
end
|
|
391
391
|
|
|
392
|
-
def
|
|
392
|
+
def test_store_multiple
|
|
393
393
|
size = 1000
|
|
394
394
|
array = (1..size).to_a.collect{|n| n.to_s}
|
|
395
395
|
stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
|
|
396
396
|
sleep 0.01
|
|
397
|
-
[e,e+".alt"].extend
|
|
397
|
+
[e,e+".alt"].extend MultipleResult
|
|
398
398
|
end
|
|
399
399
|
assert_equal size*2, stream.read.split("\n").length
|
|
400
400
|
end
|
|
@@ -11,9 +11,8 @@ class TestConcurrency < Test::Unit::TestCase
|
|
|
11
11
|
obj3 = "some string"
|
|
12
12
|
obj4 = TSV.setup({1 => 1})
|
|
13
13
|
|
|
14
|
-
socket = RbbtProcessQueue::RbbtProcessSocket.new
|
|
14
|
+
socket = RbbtProcessQueue::RbbtProcessSocket.new Marshal
|
|
15
15
|
10.times do
|
|
16
|
-
|
|
17
16
|
socket.push(obj1)
|
|
18
17
|
socket.push(obj2)
|
|
19
18
|
socket.push(obj3)
|
|
@@ -24,7 +23,6 @@ class TestConcurrency < Test::Unit::TestCase
|
|
|
24
23
|
assert_equal obj3, socket.pop
|
|
25
24
|
assert_equal obj4, socket.pop
|
|
26
25
|
|
|
27
|
-
|
|
28
26
|
end
|
|
29
27
|
|
|
30
28
|
socket.swrite.close
|
|
@@ -36,4 +34,37 @@ class TestConcurrency < Test::Unit::TestCase
|
|
|
36
34
|
end
|
|
37
35
|
end
|
|
38
36
|
|
|
37
|
+
if false and __FILE__ == $0
|
|
38
|
+
socket = RbbtProcessQueue::RbbtProcessSocket.new
|
|
39
|
+
|
|
40
|
+
obj = "Some string" * 1000
|
|
41
|
+
Misc.benchmark(1000) do
|
|
42
|
+
socket.push(obj)
|
|
43
|
+
socket.pop
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
obj = ["Some string"] * 1000
|
|
47
|
+
Misc.benchmark(1000) do
|
|
48
|
+
socket.push(obj)
|
|
49
|
+
socket.pop
|
|
50
|
+
end
|
|
51
|
+
socket.clean
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
socket = RbbtProcessQueue::RbbtProcessSocket.new Marshal
|
|
55
|
+
|
|
56
|
+
obj = "Some string" * 1000
|
|
57
|
+
Misc.benchmark(1000) do
|
|
58
|
+
socket.push(obj)
|
|
59
|
+
socket.pop
|
|
60
|
+
end
|
|
61
|
+
socket.clean
|
|
39
62
|
|
|
63
|
+
socket = RbbtProcessQueue::RbbtProcessSocket.new TSV::StringArraySerializer
|
|
64
|
+
obj = ["Some string"] * 1000
|
|
65
|
+
Misc.benchmark(1000) do
|
|
66
|
+
socket.push(obj)
|
|
67
|
+
socket.pop
|
|
68
|
+
end
|
|
69
|
+
socket.clean
|
|
70
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbbt-util
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.13.
|
|
4
|
+
version: 5.13.37
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Miguel Vazquez
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-06-
|
|
11
|
+
date: 2014-06-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -131,10 +131,12 @@ files:
|
|
|
131
131
|
- lib/rbbt/fix_width_table.rb
|
|
132
132
|
- lib/rbbt/knowledge_base.rb
|
|
133
133
|
- lib/rbbt/monitor.rb
|
|
134
|
+
- lib/rbbt/packed_index.rb
|
|
134
135
|
- lib/rbbt/persist.rb
|
|
135
136
|
- lib/rbbt/persist/tsv.rb
|
|
136
137
|
- lib/rbbt/persist/tsv/adapter.rb
|
|
137
138
|
- lib/rbbt/persist/tsv/cdb.rb
|
|
139
|
+
- lib/rbbt/persist/tsv/fix_width_table.rb
|
|
138
140
|
- lib/rbbt/persist/tsv/kyotocabinet.rb
|
|
139
141
|
- lib/rbbt/persist/tsv/leveldb.rb
|
|
140
142
|
- lib/rbbt/persist/tsv/lmdb.rb
|
|
@@ -294,6 +296,7 @@ files:
|
|
|
294
296
|
- test/rbbt/test_fix_width_table.rb
|
|
295
297
|
- test/rbbt/test_knowledge_base.rb
|
|
296
298
|
- test/rbbt/test_monitor.rb
|
|
299
|
+
- test/rbbt/test_packed_index.rb
|
|
297
300
|
- test/rbbt/test_persist.rb
|
|
298
301
|
- test/rbbt/test_resource.rb
|
|
299
302
|
- test/rbbt/test_tsv.rb
|
|
@@ -390,6 +393,7 @@ test_files:
|
|
|
390
393
|
- test/rbbt/util/test_misc.rb
|
|
391
394
|
- test/rbbt/util/test_tmpfile.rb
|
|
392
395
|
- test/rbbt/util/R/test_eval.rb
|
|
396
|
+
- test/rbbt/test_packed_index.rb
|
|
393
397
|
- test/rbbt/test_association.rb
|
|
394
398
|
- test/rbbt/test_resource.rb
|
|
395
399
|
- test/rbbt/test_entity.rb
|