rbbt-util 5.7.0 → 5.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/annotations.rb +4 -1
- data/lib/rbbt/annotations/util.rb +11 -0
- data/lib/rbbt/persist.rb +8 -2
- data/lib/rbbt/resource/path.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -15
- data/lib/rbbt/tsv/parallel.rb +89 -32
- data/lib/rbbt/tsv/util.rb +11 -0
- data/lib/rbbt/util/R.rb +0 -1
- data/lib/rbbt/util/concurrency.rb +2 -0
- data/lib/rbbt/util/concurrency/processes.rb +96 -0
- data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
- data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
- data/lib/rbbt/util/concurrency/threads.rb +76 -0
- data/lib/rbbt/util/log.rb +37 -5
- data/lib/rbbt/util/misc.rb +89 -4
- data/lib/rbbt/util/semaphore.rb +10 -4
- data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
- data/lib/rbbt/util/simpleopt/doc.rb +2 -4
- data/lib/rbbt/workflow/accessor.rb +39 -12
- data/lib/rbbt/workflow/step.rb +5 -7
- data/share/rbbt_commands/benchmark/pthrough +18 -0
- data/share/rbbt_commands/color +41 -0
- data/share/rbbt_commands/stat/density +50 -0
- data/share/rbbt_commands/tsv/info +21 -3
- data/share/rbbt_commands/tsv/slice +46 -0
- data/share/rbbt_commands/tsv/subset +53 -0
- data/share/rbbt_commands/tsv/values +7 -1
- data/test/rbbt/annotations/test_util.rb +14 -0
- data/test/rbbt/tsv/test_parallel.rb +25 -3
- data/test/rbbt/tsv/test_util.rb +15 -0
- data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
- data/test/rbbt/util/concurrency/test_processes.rb +53 -0
- data/test/rbbt/util/concurrency/test_threads.rb +42 -0
- data/test/rbbt/util/test_concurrency.rb +6 -0
- metadata +23 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d2929287d81291dd772ab9ee6f415b52bafad0db
|
4
|
+
data.tar.gz: 61ad4ae0dade13e2cbc0e02c39107243d92a52e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14285e88be1d35d8c23fb77ad038d2818ae1df04245bd2fd48bbab4f72b419eff1042764b8173b33c1d96955dee0df7ecfd27a84efb34171abb10bae05169b79
|
7
|
+
data.tar.gz: d96daf9acd2d79ab8b9425d849fd9442b7fd4ee1d614d364d135b9a601d0b7de1278eba414cb464f92f325cb84ab131311dff1937c1bdc2a99d631e7ea5f146b
|
data/lib/rbbt/annotations.rb
CHANGED
@@ -159,9 +159,12 @@ module Annotated
|
|
159
159
|
object.clean_annotations :
|
160
160
|
object.inject([]){|acc,e| acc << Annotated.purge(e); acc}
|
161
161
|
when Hash
|
162
|
+
new = {}
|
162
163
|
object.each do |key, value|
|
163
|
-
|
164
|
+
Annotated.purge key
|
165
|
+
new[key] = Annotated.purge value
|
164
166
|
end
|
167
|
+
new
|
165
168
|
else
|
166
169
|
object
|
167
170
|
end
|
data/lib/rbbt/persist.rb
CHANGED
@@ -276,10 +276,16 @@ module Persist
|
|
276
276
|
Log.medium "Persist create: #{ path } - #{persist_options.inspect[0..100]}"
|
277
277
|
res = yield
|
278
278
|
|
279
|
-
|
280
|
-
|
279
|
+
if res.nil?
|
280
|
+
res = load_file(path) unless persist_options[:no_load]
|
281
|
+
else
|
282
|
+
Misc.lock(path) do
|
283
|
+
save_file(path, type, res)
|
284
|
+
end
|
281
285
|
end
|
282
286
|
|
287
|
+
return path if persist_options[:no_load]
|
288
|
+
|
283
289
|
res
|
284
290
|
end
|
285
291
|
rescue
|
data/lib/rbbt/resource/path.rb
CHANGED
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -7,8 +7,12 @@ module TSV
|
|
7
7
|
|
8
8
|
attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
|
9
9
|
|
10
|
+
def info
|
11
|
+
{:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed}.delete_if{|k,v| v.nil? }
|
12
|
+
end
|
13
|
+
|
10
14
|
def annotate(tsv)
|
11
|
-
TSV.setup(tsv,
|
15
|
+
TSV.setup(tsv, info)
|
12
16
|
end
|
13
17
|
|
14
18
|
def entity_options
|
@@ -149,14 +153,16 @@ module TSV
|
|
149
153
|
def serializer=(serializer)
|
150
154
|
@serializer = serializer
|
151
155
|
self.send(:[]=, KEY_PREFIX + 'serializer', (serializer.nil? ? SERIALIZED_NIL : TSV_SERIALIZER.dump(serializer)), :entry_key)
|
152
|
-
@serializar_module = serializer.nil? ?
|
156
|
+
@serializar_module = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
|
153
157
|
end
|
154
158
|
|
155
159
|
|
156
160
|
def serializer_module
|
157
|
-
@
|
161
|
+
@serializer_module ||= begin
|
158
162
|
serializer = self.serializer
|
159
|
-
serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
|
163
|
+
mod = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
|
164
|
+
raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
|
165
|
+
mod
|
160
166
|
end
|
161
167
|
end
|
162
168
|
|
@@ -170,8 +176,10 @@ module TSV
|
|
170
176
|
def [](key, clean = false)
|
171
177
|
value = super(key)
|
172
178
|
return value if clean or value.nil?
|
179
|
+
@serializer_module ||= self.serializer_module
|
180
|
+
|
181
|
+
value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
|
173
182
|
|
174
|
-
value = serializer_module.load(value) if serializer_module and not TSV::CleanSerializer === serializer_module
|
175
183
|
return value if @unnamed or fields.nil?
|
176
184
|
|
177
185
|
case type
|
@@ -186,11 +194,8 @@ module TSV
|
|
186
194
|
end
|
187
195
|
|
188
196
|
def []=(key, value, clean = false)
|
189
|
-
if clean or
|
190
|
-
|
191
|
-
else
|
192
|
-
return super(key, serializer_module.dump(value))
|
193
|
-
end
|
197
|
+
return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
|
198
|
+
super(key, @serializer_module.dump(value))
|
194
199
|
end
|
195
200
|
|
196
201
|
def zip_new(key, values)
|
@@ -231,20 +236,19 @@ module TSV
|
|
231
236
|
def each
|
232
237
|
fields = self.fields
|
233
238
|
|
234
|
-
serializer = self.serializer
|
235
239
|
serializer_module = self.serializer_module
|
236
240
|
super do |key, value|
|
237
241
|
next if ENTRY_KEYS.include? key
|
238
242
|
|
239
243
|
# TODO Update this to be more efficient
|
240
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer
|
244
|
+
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
241
245
|
|
242
246
|
# Annotated with Entity and NamedArray
|
243
247
|
if not @unnamed
|
244
248
|
if not fields.nil?
|
245
249
|
case type
|
246
250
|
when :double, :list
|
247
|
-
setup_array value, fields, key, entity_options, entity_templates if Array
|
251
|
+
setup_array value, fields, key, entity_options, entity_templates if Array == value
|
248
252
|
when :flat, :single
|
249
253
|
prepare_entity(value, fields.first, entity_options)
|
250
254
|
end
|
@@ -258,13 +262,12 @@ module TSV
|
|
258
262
|
end
|
259
263
|
|
260
264
|
def collect
|
261
|
-
serializer = self.serializer
|
262
265
|
serializer_module = self.serializer_module
|
263
266
|
super do |key, value|
|
264
267
|
next if ENTRY_KEYS.include? key
|
265
268
|
|
266
269
|
# TODO Update this to be more efficient
|
267
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer
|
270
|
+
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
268
271
|
|
269
272
|
# Annotated with Entity and NamedArray
|
270
273
|
if not @unnamed
|
data/lib/rbbt/tsv/parallel.rb
CHANGED
@@ -1,47 +1,104 @@
|
|
1
|
+
require 'rbbt/util/concurrency'
|
2
|
+
|
1
3
|
module TSV
|
2
4
|
|
3
|
-
def pthrough(num_threads =
|
4
|
-
q =
|
5
|
-
mutex = Mutex.new
|
6
|
-
|
7
|
-
threads = []
|
8
|
-
|
9
|
-
done = false
|
10
|
-
num_threads.times do |i|
|
11
|
-
threads << Thread.new(Thread.current) do |current|
|
12
|
-
begin
|
13
|
-
loop do
|
14
|
-
p = q.pop
|
15
|
-
p << mutex
|
16
|
-
yield *p
|
17
|
-
next if q.length == 0 and done
|
18
|
-
end
|
19
|
-
rescue Exception
|
20
|
-
current.raise $!
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
5
|
+
def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
|
6
|
+
q = RbbtThreadQueue.new num_threads
|
24
7
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
q
|
8
|
+
q.init(true, &block)
|
9
|
+
|
10
|
+
begin
|
11
|
+
res = through(new_key_field, new_fields, uniq, zipped) do |*p|
|
12
|
+
q.process p
|
30
13
|
end
|
31
|
-
q
|
14
|
+
q.join
|
15
|
+
ensure
|
16
|
+
q.clean
|
32
17
|
end
|
33
18
|
|
34
|
-
|
19
|
+
end
|
20
|
+
|
21
|
+
def ppthrough_callback(&block)
|
22
|
+
@ppthrough_callback = block
|
23
|
+
end
|
35
24
|
|
36
|
-
|
25
|
+
def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
|
37
26
|
|
27
|
+
q = RbbtProcessQueue.new num_procs
|
38
28
|
|
39
|
-
|
29
|
+
q.callback &@ppthrough_callback
|
30
|
+
@ppthrough_callback = nil
|
31
|
+
|
32
|
+
q.init do |k,v|
|
33
|
+
block.call k,v
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
res = through(new_key_field, new_fields, uniq, zipped) do |*p|
|
38
|
+
q.process q
|
39
|
+
end
|
40
|
+
q.join
|
41
|
+
ensure
|
42
|
+
q.clean
|
43
|
+
end
|
40
44
|
|
41
45
|
res
|
42
46
|
end
|
43
47
|
|
44
|
-
def
|
45
|
-
|
48
|
+
def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
|
49
|
+
|
50
|
+
q = RbbtProcessQueue.new num_procs
|
51
|
+
|
52
|
+
q.callback &@ppthrough_callback
|
53
|
+
@ppthrough_callback = nil
|
54
|
+
|
55
|
+
_pat_size = 20
|
56
|
+
_pat = "A" << _pat_size.to_s
|
57
|
+
|
58
|
+
num_fields = fields.length
|
59
|
+
pattern = case type
|
60
|
+
when :single, :flat
|
61
|
+
_pat * 2
|
62
|
+
when :list, :double
|
63
|
+
_pat * (num_fields + 1)
|
64
|
+
end
|
65
|
+
|
66
|
+
q.init do |str|
|
67
|
+
_parts = str.unpack(pattern)
|
68
|
+
|
69
|
+
case type
|
70
|
+
when :single
|
71
|
+
k, v = _parts
|
72
|
+
when :list
|
73
|
+
k, *v = _parts
|
74
|
+
when :flat
|
75
|
+
k, v = _parts
|
76
|
+
v = v.split "|"
|
77
|
+
when :double
|
78
|
+
k, *v = _parts
|
79
|
+
v = v.collect{|l| l.split "|" }
|
80
|
+
end
|
81
|
+
|
82
|
+
block.call k,v
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
res = through(new_key_field, new_fields, uniq, zipped) do |k,v|
|
87
|
+
case type
|
88
|
+
when :flat
|
89
|
+
v = v * "|"
|
90
|
+
when :double
|
91
|
+
v = v.collect{|l| l * "|" } if type == :double
|
92
|
+
end
|
93
|
+
|
94
|
+
str = [k,v].flatten.pack(pattern)
|
95
|
+
q.process str
|
96
|
+
end
|
97
|
+
q.join
|
98
|
+
ensure
|
99
|
+
q.clean
|
100
|
+
end
|
101
|
+
|
102
|
+
res
|
46
103
|
end
|
47
104
|
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -53,7 +53,6 @@ source('#{UTIL}');
|
|
53
53
|
when nil
|
54
54
|
"NULL"
|
55
55
|
when TSV
|
56
|
-
#"as.matrix(data.frame(c(#{object.transpose("Field").collect{|k,v| "#{k}=" << R.ruby2R(v)}.flatten * ", "}), row.names=#{R.ruby2R object.keys}))"
|
57
56
|
"matrix(#{R.ruby2R object.values},dimnames=list(#{R.ruby2R object.keys}, #{R.ruby2R object.fields}))"
|
58
57
|
when Symbol
|
59
58
|
"#{ object }"
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'rbbt/util/concurrency/processes/worker'
|
2
|
+
require 'rbbt/util/concurrency/processes/socket'
|
3
|
+
|
4
|
+
|
5
|
+
class RbbtProcessQueue
|
6
|
+
#{{{ RbbtProcessQueue
|
7
|
+
|
8
|
+
attr_accessor :num_processes, :processes, :queue, :process_monitor
|
9
|
+
def initialize(num_processes)
|
10
|
+
@num_processes = num_processes
|
11
|
+
@processes = []
|
12
|
+
@queue = RbbtProcessSocket.new
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :callback, :callback_queue, :callback_thread
|
16
|
+
def callback(&block)
|
17
|
+
if block_given?
|
18
|
+
@callback = block
|
19
|
+
|
20
|
+
@callback_queue = RbbtProcessSocket.new
|
21
|
+
|
22
|
+
@callback_thread = Thread.new(Thread.current) do |parent|
|
23
|
+
begin
|
24
|
+
loop do
|
25
|
+
p = @callback_queue.pop
|
26
|
+
raise p if Exception === p
|
27
|
+
@callback.call p
|
28
|
+
end
|
29
|
+
rescue ClosedStream
|
30
|
+
rescue Exception
|
31
|
+
Log.debug $!
|
32
|
+
parent.raise $!
|
33
|
+
Thread.exit
|
34
|
+
end
|
35
|
+
end
|
36
|
+
else
|
37
|
+
@callback, @callback_queue, @callback_thread = nil, nil, nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def init(&block)
|
42
|
+
num_processes.times do |i|
|
43
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, &block)
|
44
|
+
end
|
45
|
+
@queue.sread.close
|
46
|
+
@callback_queue.swrite.close if @callback_queue
|
47
|
+
|
48
|
+
@process_monitor = Thread.new(Thread.current) do |parent|
|
49
|
+
begin
|
50
|
+
while @processes.any? do
|
51
|
+
pid = Process.wait -1, Process::WNOHANG
|
52
|
+
if pid
|
53
|
+
@processes.delete_if{|p| p.pid == pid}
|
54
|
+
raise "Process #{pid} failed" unless $?.success?
|
55
|
+
else
|
56
|
+
sleep 1
|
57
|
+
end
|
58
|
+
end
|
59
|
+
rescue
|
60
|
+
parent.raise $!
|
61
|
+
ensure
|
62
|
+
Thread.exit
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def close_callback
|
68
|
+
@callback_thread.join if @callback_thread and @callback_thread.alive?
|
69
|
+
end
|
70
|
+
|
71
|
+
def join
|
72
|
+
@queue.push ClosedStream.new
|
73
|
+
@queue.swrite.close
|
74
|
+
begin
|
75
|
+
@process_monitor.join
|
76
|
+
ensure
|
77
|
+
close_callback if @callback
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def clean
|
82
|
+
@processes.each{|p| p.abort }.clear
|
83
|
+
@callback_thread.raise Aborted if @callback_thread and @callback_thread.alive?
|
84
|
+
end
|
85
|
+
|
86
|
+
def process(e)
|
87
|
+
@queue.push e
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.each(list, num = 3, &block)
|
91
|
+
q = RbbtProcessQueue.new num
|
92
|
+
q.init(&block)
|
93
|
+
list.each do |elem| q.process elem end
|
94
|
+
q.join
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'rbbt/util/semaphore'
|
2
|
+
|
3
|
+
class RbbtProcessQueue
|
4
|
+
class RbbtProcessSocket
|
5
|
+
|
6
|
+
Serializer = Marshal
|
7
|
+
|
8
|
+
attr_accessor :sread, :swrite, :write_sem, :read_sem
|
9
|
+
def initialize
|
10
|
+
@sread, @swrite = IO.pipe
|
11
|
+
|
12
|
+
key = rand(100000).to_s;
|
13
|
+
@write_sem = key + '.in'
|
14
|
+
@read_sem = key + '.out'
|
15
|
+
RbbtSemaphore.create_semaphore(@write_sem,1)
|
16
|
+
RbbtSemaphore.create_semaphore(@read_sem,1)
|
17
|
+
end
|
18
|
+
|
19
|
+
def clean
|
20
|
+
@sread.close unless @sread.closed?
|
21
|
+
@swrite.close unless @swrite.closed?
|
22
|
+
RbbtSemaphore.delete_semaphore(@write_sem)
|
23
|
+
RbbtSemaphore.delete_semaphore(@read_sem)
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def dump(obj, stream)
|
28
|
+
case obj
|
29
|
+
when String
|
30
|
+
payload = obj
|
31
|
+
size_head = [payload.bytesize,"S"].pack 'La'
|
32
|
+
str = size_head << payload
|
33
|
+
else
|
34
|
+
payload = Serializer.dump(obj)
|
35
|
+
size_head = [payload.bytesize,"M"].pack 'La'
|
36
|
+
str = size_head << payload
|
37
|
+
end
|
38
|
+
|
39
|
+
write_length = str.length
|
40
|
+
IO.select(nil, [stream])
|
41
|
+
wrote = stream.write(str)
|
42
|
+
while wrote < write_length
|
43
|
+
wrote += stream.write(str[wrote..-1])
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def load(stream)
|
48
|
+
size_head = Misc.read_stream stream, 5
|
49
|
+
|
50
|
+
size, type = size_head.unpack('La')
|
51
|
+
|
52
|
+
begin
|
53
|
+
payload = Misc.read_stream stream, size
|
54
|
+
case type
|
55
|
+
when "M"
|
56
|
+
Serializer.load(payload)
|
57
|
+
when "S"
|
58
|
+
payload
|
59
|
+
end
|
60
|
+
rescue TryAgain
|
61
|
+
retry
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#{{{ ACCESSOR
|
66
|
+
|
67
|
+
def push(obj)
|
68
|
+
begin
|
69
|
+
RbbtSemaphore.synchronize(@write_sem) do
|
70
|
+
self.dump(obj, @swrite)
|
71
|
+
end
|
72
|
+
rescue
|
73
|
+
return ClosedStream.new
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def pop
|
78
|
+
begin
|
79
|
+
RbbtSemaphore.synchronize(@read_sem) do
|
80
|
+
self.load(@sread)
|
81
|
+
end
|
82
|
+
rescue IOError, ClosedStream
|
83
|
+
return ClosedStream.new
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|