rbbt-util 5.7.0 → 5.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/annotations.rb +4 -1
- data/lib/rbbt/annotations/util.rb +11 -0
- data/lib/rbbt/persist.rb +8 -2
- data/lib/rbbt/resource/path.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -15
- data/lib/rbbt/tsv/parallel.rb +89 -32
- data/lib/rbbt/tsv/util.rb +11 -0
- data/lib/rbbt/util/R.rb +0 -1
- data/lib/rbbt/util/concurrency.rb +2 -0
- data/lib/rbbt/util/concurrency/processes.rb +96 -0
- data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
- data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
- data/lib/rbbt/util/concurrency/threads.rb +76 -0
- data/lib/rbbt/util/log.rb +37 -5
- data/lib/rbbt/util/misc.rb +89 -4
- data/lib/rbbt/util/semaphore.rb +10 -4
- data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
- data/lib/rbbt/util/simpleopt/doc.rb +2 -4
- data/lib/rbbt/workflow/accessor.rb +39 -12
- data/lib/rbbt/workflow/step.rb +5 -7
- data/share/rbbt_commands/benchmark/pthrough +18 -0
- data/share/rbbt_commands/color +41 -0
- data/share/rbbt_commands/stat/density +50 -0
- data/share/rbbt_commands/tsv/info +21 -3
- data/share/rbbt_commands/tsv/slice +46 -0
- data/share/rbbt_commands/tsv/subset +53 -0
- data/share/rbbt_commands/tsv/values +7 -1
- data/test/rbbt/annotations/test_util.rb +14 -0
- data/test/rbbt/tsv/test_parallel.rb +25 -3
- data/test/rbbt/tsv/test_util.rb +15 -0
- data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
- data/test/rbbt/util/concurrency/test_processes.rb +53 -0
- data/test/rbbt/util/concurrency/test_threads.rb +42 -0
- data/test/rbbt/util/test_concurrency.rb +6 -0
- metadata +23 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d2929287d81291dd772ab9ee6f415b52bafad0db
|
4
|
+
data.tar.gz: 61ad4ae0dade13e2cbc0e02c39107243d92a52e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14285e88be1d35d8c23fb77ad038d2818ae1df04245bd2fd48bbab4f72b419eff1042764b8173b33c1d96955dee0df7ecfd27a84efb34171abb10bae05169b79
|
7
|
+
data.tar.gz: d96daf9acd2d79ab8b9425d849fd9442b7fd4ee1d614d364d135b9a601d0b7de1278eba414cb464f92f325cb84ab131311dff1937c1bdc2a99d631e7ea5f146b
|
data/lib/rbbt/annotations.rb
CHANGED
@@ -159,9 +159,12 @@ module Annotated
|
|
159
159
|
object.clean_annotations :
|
160
160
|
object.inject([]){|acc,e| acc << Annotated.purge(e); acc}
|
161
161
|
when Hash
|
162
|
+
new = {}
|
162
163
|
object.each do |key, value|
|
163
|
-
|
164
|
+
Annotated.purge key
|
165
|
+
new[key] = Annotated.purge value
|
164
166
|
end
|
167
|
+
new
|
165
168
|
else
|
166
169
|
object
|
167
170
|
end
|
data/lib/rbbt/persist.rb
CHANGED
@@ -276,10 +276,16 @@ module Persist
|
|
276
276
|
Log.medium "Persist create: #{ path } - #{persist_options.inspect[0..100]}"
|
277
277
|
res = yield
|
278
278
|
|
279
|
-
|
280
|
-
|
279
|
+
if res.nil?
|
280
|
+
res = load_file(path) unless persist_options[:no_load]
|
281
|
+
else
|
282
|
+
Misc.lock(path) do
|
283
|
+
save_file(path, type, res)
|
284
|
+
end
|
281
285
|
end
|
282
286
|
|
287
|
+
return path if persist_options[:no_load]
|
288
|
+
|
283
289
|
res
|
284
290
|
end
|
285
291
|
rescue
|
data/lib/rbbt/resource/path.rb
CHANGED
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -7,8 +7,12 @@ module TSV
|
|
7
7
|
|
8
8
|
attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
|
9
9
|
|
10
|
+
def info
|
11
|
+
{:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed}.delete_if{|k,v| v.nil? }
|
12
|
+
end
|
13
|
+
|
10
14
|
def annotate(tsv)
|
11
|
-
TSV.setup(tsv,
|
15
|
+
TSV.setup(tsv, info)
|
12
16
|
end
|
13
17
|
|
14
18
|
def entity_options
|
@@ -149,14 +153,16 @@ module TSV
|
|
149
153
|
def serializer=(serializer)
|
150
154
|
@serializer = serializer
|
151
155
|
self.send(:[]=, KEY_PREFIX + 'serializer', (serializer.nil? ? SERIALIZED_NIL : TSV_SERIALIZER.dump(serializer)), :entry_key)
|
152
|
-
@serializar_module = serializer.nil? ?
|
156
|
+
@serializar_module = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
|
153
157
|
end
|
154
158
|
|
155
159
|
|
156
160
|
def serializer_module
|
157
|
-
@
|
161
|
+
@serializer_module ||= begin
|
158
162
|
serializer = self.serializer
|
159
|
-
serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
|
163
|
+
mod = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
|
164
|
+
raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
|
165
|
+
mod
|
160
166
|
end
|
161
167
|
end
|
162
168
|
|
@@ -170,8 +176,10 @@ module TSV
|
|
170
176
|
def [](key, clean = false)
|
171
177
|
value = super(key)
|
172
178
|
return value if clean or value.nil?
|
179
|
+
@serializer_module ||= self.serializer_module
|
180
|
+
|
181
|
+
value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
|
173
182
|
|
174
|
-
value = serializer_module.load(value) if serializer_module and not TSV::CleanSerializer === serializer_module
|
175
183
|
return value if @unnamed or fields.nil?
|
176
184
|
|
177
185
|
case type
|
@@ -186,11 +194,8 @@ module TSV
|
|
186
194
|
end
|
187
195
|
|
188
196
|
def []=(key, value, clean = false)
|
189
|
-
if clean or
|
190
|
-
|
191
|
-
else
|
192
|
-
return super(key, serializer_module.dump(value))
|
193
|
-
end
|
197
|
+
return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
|
198
|
+
super(key, @serializer_module.dump(value))
|
194
199
|
end
|
195
200
|
|
196
201
|
def zip_new(key, values)
|
@@ -231,20 +236,19 @@ module TSV
|
|
231
236
|
def each
|
232
237
|
fields = self.fields
|
233
238
|
|
234
|
-
serializer = self.serializer
|
235
239
|
serializer_module = self.serializer_module
|
236
240
|
super do |key, value|
|
237
241
|
next if ENTRY_KEYS.include? key
|
238
242
|
|
239
243
|
# TODO Update this to be more efficient
|
240
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer
|
244
|
+
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
241
245
|
|
242
246
|
# Annotated with Entity and NamedArray
|
243
247
|
if not @unnamed
|
244
248
|
if not fields.nil?
|
245
249
|
case type
|
246
250
|
when :double, :list
|
247
|
-
setup_array value, fields, key, entity_options, entity_templates if Array
|
251
|
+
setup_array value, fields, key, entity_options, entity_templates if Array == value
|
248
252
|
when :flat, :single
|
249
253
|
prepare_entity(value, fields.first, entity_options)
|
250
254
|
end
|
@@ -258,13 +262,12 @@ module TSV
|
|
258
262
|
end
|
259
263
|
|
260
264
|
def collect
|
261
|
-
serializer = self.serializer
|
262
265
|
serializer_module = self.serializer_module
|
263
266
|
super do |key, value|
|
264
267
|
next if ENTRY_KEYS.include? key
|
265
268
|
|
266
269
|
# TODO Update this to be more efficient
|
267
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer
|
270
|
+
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
268
271
|
|
269
272
|
# Annotated with Entity and NamedArray
|
270
273
|
if not @unnamed
|
data/lib/rbbt/tsv/parallel.rb
CHANGED
@@ -1,47 +1,104 @@
|
|
1
|
+
require 'rbbt/util/concurrency'
|
2
|
+
|
1
3
|
module TSV
|
2
4
|
|
3
|
-
def pthrough(num_threads =
|
4
|
-
q =
|
5
|
-
mutex = Mutex.new
|
6
|
-
|
7
|
-
threads = []
|
8
|
-
|
9
|
-
done = false
|
10
|
-
num_threads.times do |i|
|
11
|
-
threads << Thread.new(Thread.current) do |current|
|
12
|
-
begin
|
13
|
-
loop do
|
14
|
-
p = q.pop
|
15
|
-
p << mutex
|
16
|
-
yield *p
|
17
|
-
next if q.length == 0 and done
|
18
|
-
end
|
19
|
-
rescue Exception
|
20
|
-
current.raise $!
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
5
|
+
def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
|
6
|
+
q = RbbtThreadQueue.new num_threads
|
24
7
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
q
|
8
|
+
q.init(true, &block)
|
9
|
+
|
10
|
+
begin
|
11
|
+
res = through(new_key_field, new_fields, uniq, zipped) do |*p|
|
12
|
+
q.process p
|
30
13
|
end
|
31
|
-
q
|
14
|
+
q.join
|
15
|
+
ensure
|
16
|
+
q.clean
|
32
17
|
end
|
33
18
|
|
34
|
-
|
19
|
+
end
|
20
|
+
|
21
|
+
def ppthrough_callback(&block)
|
22
|
+
@ppthrough_callback = block
|
23
|
+
end
|
35
24
|
|
36
|
-
|
25
|
+
def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
|
37
26
|
|
27
|
+
q = RbbtProcessQueue.new num_procs
|
38
28
|
|
39
|
-
|
29
|
+
q.callback &@ppthrough_callback
|
30
|
+
@ppthrough_callback = nil
|
31
|
+
|
32
|
+
q.init do |k,v|
|
33
|
+
block.call k,v
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
res = through(new_key_field, new_fields, uniq, zipped) do |*p|
|
38
|
+
q.process q
|
39
|
+
end
|
40
|
+
q.join
|
41
|
+
ensure
|
42
|
+
q.clean
|
43
|
+
end
|
40
44
|
|
41
45
|
res
|
42
46
|
end
|
43
47
|
|
44
|
-
def
|
45
|
-
|
48
|
+
def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
|
49
|
+
|
50
|
+
q = RbbtProcessQueue.new num_procs
|
51
|
+
|
52
|
+
q.callback &@ppthrough_callback
|
53
|
+
@ppthrough_callback = nil
|
54
|
+
|
55
|
+
_pat_size = 20
|
56
|
+
_pat = "A" << _pat_size.to_s
|
57
|
+
|
58
|
+
num_fields = fields.length
|
59
|
+
pattern = case type
|
60
|
+
when :single, :flat
|
61
|
+
_pat * 2
|
62
|
+
when :list, :double
|
63
|
+
_pat * (num_fields + 1)
|
64
|
+
end
|
65
|
+
|
66
|
+
q.init do |str|
|
67
|
+
_parts = str.unpack(pattern)
|
68
|
+
|
69
|
+
case type
|
70
|
+
when :single
|
71
|
+
k, v = _parts
|
72
|
+
when :list
|
73
|
+
k, *v = _parts
|
74
|
+
when :flat
|
75
|
+
k, v = _parts
|
76
|
+
v = v.split "|"
|
77
|
+
when :double
|
78
|
+
k, *v = _parts
|
79
|
+
v = v.collect{|l| l.split "|" }
|
80
|
+
end
|
81
|
+
|
82
|
+
block.call k,v
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
res = through(new_key_field, new_fields, uniq, zipped) do |k,v|
|
87
|
+
case type
|
88
|
+
when :flat
|
89
|
+
v = v * "|"
|
90
|
+
when :double
|
91
|
+
v = v.collect{|l| l * "|" } if type == :double
|
92
|
+
end
|
93
|
+
|
94
|
+
str = [k,v].flatten.pack(pattern)
|
95
|
+
q.process str
|
96
|
+
end
|
97
|
+
q.join
|
98
|
+
ensure
|
99
|
+
q.clean
|
100
|
+
end
|
101
|
+
|
102
|
+
res
|
46
103
|
end
|
47
104
|
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -53,7 +53,6 @@ source('#{UTIL}');
|
|
53
53
|
when nil
|
54
54
|
"NULL"
|
55
55
|
when TSV
|
56
|
-
#"as.matrix(data.frame(c(#{object.transpose("Field").collect{|k,v| "#{k}=" << R.ruby2R(v)}.flatten * ", "}), row.names=#{R.ruby2R object.keys}))"
|
57
56
|
"matrix(#{R.ruby2R object.values},dimnames=list(#{R.ruby2R object.keys}, #{R.ruby2R object.fields}))"
|
58
57
|
when Symbol
|
59
58
|
"#{ object }"
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'rbbt/util/concurrency/processes/worker'
|
2
|
+
require 'rbbt/util/concurrency/processes/socket'
|
3
|
+
|
4
|
+
|
5
|
+
class RbbtProcessQueue
|
6
|
+
#{{{ RbbtProcessQueue
|
7
|
+
|
8
|
+
attr_accessor :num_processes, :processes, :queue, :process_monitor
|
9
|
+
def initialize(num_processes)
|
10
|
+
@num_processes = num_processes
|
11
|
+
@processes = []
|
12
|
+
@queue = RbbtProcessSocket.new
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :callback, :callback_queue, :callback_thread
|
16
|
+
def callback(&block)
|
17
|
+
if block_given?
|
18
|
+
@callback = block
|
19
|
+
|
20
|
+
@callback_queue = RbbtProcessSocket.new
|
21
|
+
|
22
|
+
@callback_thread = Thread.new(Thread.current) do |parent|
|
23
|
+
begin
|
24
|
+
loop do
|
25
|
+
p = @callback_queue.pop
|
26
|
+
raise p if Exception === p
|
27
|
+
@callback.call p
|
28
|
+
end
|
29
|
+
rescue ClosedStream
|
30
|
+
rescue Exception
|
31
|
+
Log.debug $!
|
32
|
+
parent.raise $!
|
33
|
+
Thread.exit
|
34
|
+
end
|
35
|
+
end
|
36
|
+
else
|
37
|
+
@callback, @callback_queue, @callback_thread = nil, nil, nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def init(&block)
|
42
|
+
num_processes.times do |i|
|
43
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, &block)
|
44
|
+
end
|
45
|
+
@queue.sread.close
|
46
|
+
@callback_queue.swrite.close if @callback_queue
|
47
|
+
|
48
|
+
@process_monitor = Thread.new(Thread.current) do |parent|
|
49
|
+
begin
|
50
|
+
while @processes.any? do
|
51
|
+
pid = Process.wait -1, Process::WNOHANG
|
52
|
+
if pid
|
53
|
+
@processes.delete_if{|p| p.pid == pid}
|
54
|
+
raise "Process #{pid} failed" unless $?.success?
|
55
|
+
else
|
56
|
+
sleep 1
|
57
|
+
end
|
58
|
+
end
|
59
|
+
rescue
|
60
|
+
parent.raise $!
|
61
|
+
ensure
|
62
|
+
Thread.exit
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def close_callback
|
68
|
+
@callback_thread.join if @callback_thread and @callback_thread.alive?
|
69
|
+
end
|
70
|
+
|
71
|
+
def join
|
72
|
+
@queue.push ClosedStream.new
|
73
|
+
@queue.swrite.close
|
74
|
+
begin
|
75
|
+
@process_monitor.join
|
76
|
+
ensure
|
77
|
+
close_callback if @callback
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def clean
|
82
|
+
@processes.each{|p| p.abort }.clear
|
83
|
+
@callback_thread.raise Aborted if @callback_thread and @callback_thread.alive?
|
84
|
+
end
|
85
|
+
|
86
|
+
def process(e)
|
87
|
+
@queue.push e
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.each(list, num = 3, &block)
|
91
|
+
q = RbbtProcessQueue.new num
|
92
|
+
q.init(&block)
|
93
|
+
list.each do |elem| q.process elem end
|
94
|
+
q.join
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'rbbt/util/semaphore'
|
2
|
+
|
3
|
+
class RbbtProcessQueue
|
4
|
+
class RbbtProcessSocket
|
5
|
+
|
6
|
+
Serializer = Marshal
|
7
|
+
|
8
|
+
attr_accessor :sread, :swrite, :write_sem, :read_sem
|
9
|
+
def initialize
|
10
|
+
@sread, @swrite = IO.pipe
|
11
|
+
|
12
|
+
key = rand(100000).to_s;
|
13
|
+
@write_sem = key + '.in'
|
14
|
+
@read_sem = key + '.out'
|
15
|
+
RbbtSemaphore.create_semaphore(@write_sem,1)
|
16
|
+
RbbtSemaphore.create_semaphore(@read_sem,1)
|
17
|
+
end
|
18
|
+
|
19
|
+
def clean
|
20
|
+
@sread.close unless @sread.closed?
|
21
|
+
@swrite.close unless @swrite.closed?
|
22
|
+
RbbtSemaphore.delete_semaphore(@write_sem)
|
23
|
+
RbbtSemaphore.delete_semaphore(@read_sem)
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def dump(obj, stream)
|
28
|
+
case obj
|
29
|
+
when String
|
30
|
+
payload = obj
|
31
|
+
size_head = [payload.bytesize,"S"].pack 'La'
|
32
|
+
str = size_head << payload
|
33
|
+
else
|
34
|
+
payload = Serializer.dump(obj)
|
35
|
+
size_head = [payload.bytesize,"M"].pack 'La'
|
36
|
+
str = size_head << payload
|
37
|
+
end
|
38
|
+
|
39
|
+
write_length = str.length
|
40
|
+
IO.select(nil, [stream])
|
41
|
+
wrote = stream.write(str)
|
42
|
+
while wrote < write_length
|
43
|
+
wrote += stream.write(str[wrote..-1])
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def load(stream)
|
48
|
+
size_head = Misc.read_stream stream, 5
|
49
|
+
|
50
|
+
size, type = size_head.unpack('La')
|
51
|
+
|
52
|
+
begin
|
53
|
+
payload = Misc.read_stream stream, size
|
54
|
+
case type
|
55
|
+
when "M"
|
56
|
+
Serializer.load(payload)
|
57
|
+
when "S"
|
58
|
+
payload
|
59
|
+
end
|
60
|
+
rescue TryAgain
|
61
|
+
retry
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#{{{ ACCESSOR
|
66
|
+
|
67
|
+
def push(obj)
|
68
|
+
begin
|
69
|
+
RbbtSemaphore.synchronize(@write_sem) do
|
70
|
+
self.dump(obj, @swrite)
|
71
|
+
end
|
72
|
+
rescue
|
73
|
+
return ClosedStream.new
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def pop
|
78
|
+
begin
|
79
|
+
RbbtSemaphore.synchronize(@read_sem) do
|
80
|
+
self.load(@sread)
|
81
|
+
end
|
82
|
+
rescue IOError, ClosedStream
|
83
|
+
return ClosedStream.new
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|