rbbt-util 5.20.2 → 5.20.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/association/index.rb +3 -4
- data/lib/rbbt/tsv/accessor.rb +1 -1
- data/lib/rbbt/tsv/manipulate.rb +0 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +8 -2
- data/lib/rbbt/util/cmd.rb +1 -1
- data/lib/rbbt/util/concurrency/processes.rb +4 -3
- data/lib/rbbt/util/concurrency/processes/worker.rb +4 -2
- data/lib/rbbt/util/log/progress/util.rb +20 -1
- data/lib/rbbt/util/misc/bgzf.rb +1 -1
- data/lib/rbbt/util/misc/inspect.rb +7 -3
- data/lib/rbbt/util/misc/multipart_payload.rb +8 -8
- data/lib/rbbt/util/misc/pipes.rb +115 -51
- data/lib/rbbt/util/open.rb +1 -1
- data/lib/rbbt/workflow.rb +1 -0
- data/lib/rbbt/workflow/accessor.rb +86 -15
- data/lib/rbbt/workflow/definition.rb +10 -17
- data/lib/rbbt/workflow/step.rb +4 -16
- data/lib/rbbt/workflow/step/dependencies.rb +451 -0
- data/lib/rbbt/workflow/step/run.rb +43 -226
- data/lib/rbbt/workflow/task.rb +7 -1
- data/share/rbbt_commands/association/subset +6 -1
- data/share/rbbt_commands/workflow/task +21 -9
- data/test/rbbt/util/misc/test_pipes.rb +54 -0
- data/test/rbbt/workflow/step/test_dependencies.rb +206 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c01ec7b654f4f713aaf4c9cf3b518aec087009de
|
4
|
+
data.tar.gz: 2721569f2e2a9445c97415eb5093150a6ca1850a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c152113e19eaea2dac605abdff6e60132137dd1eb09f797befc230d2d4eb6422ce5c9c0defaaab6d6f684e772c0d65776efc0836ffacbf680b9881d1e4a13d41
|
7
|
+
data.tar.gz: 68a251d79736a2c0d54120f50071015c96a51270886f83ea951ca460fc1c0ae48af8f07ad3a90d1ed93a81af0acbfb7d3de7b57907ebaca741610e06e3de4740
|
@@ -59,18 +59,16 @@ module Association
|
|
59
59
|
list.replace [list.first] * size if list.length == 1
|
60
60
|
end if recycle and size > 1
|
61
61
|
|
62
|
-
|
63
62
|
rest = Misc.zip_fields rest
|
64
63
|
|
65
|
-
|
66
64
|
annotations = (Array === rest.first and rest.first.length > 1) ?
|
67
65
|
targets.zip(rest) :
|
68
66
|
targets.zip(rest * targets.length)
|
69
67
|
|
68
|
+
source = source.gsub('~','-..-')
|
70
69
|
annotations.each do |target, info|
|
71
70
|
next if target.nil? or target.empty?
|
72
|
-
|
73
|
-
target.gsub!('~','-..-')
|
71
|
+
target = target.gsub('~','-..-')
|
74
72
|
key = [source, target] * "~"
|
75
73
|
|
76
74
|
if data[key].nil? or info.nil?
|
@@ -160,6 +158,7 @@ module Association
|
|
160
158
|
|
161
159
|
new.undirected = undirected
|
162
160
|
|
161
|
+
Log.tsv new
|
163
162
|
new
|
164
163
|
rescue Exception
|
165
164
|
Log.error "Deleting after error reversing database: #{ reverse_filename }"
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -20,7 +20,13 @@ module TSV
|
|
20
20
|
case obj
|
21
21
|
when (defined? Step and Step)
|
22
22
|
if obj.done?
|
23
|
-
|
23
|
+
path = obj.path
|
24
|
+
path = path.find if path.respond_to? :find
|
25
|
+
if File.exists? path
|
26
|
+
CMD.cmd("wc -l '#{path}'").read.to_i
|
27
|
+
else
|
28
|
+
nil
|
29
|
+
end
|
24
30
|
else
|
25
31
|
nil
|
26
32
|
end
|
@@ -349,7 +355,7 @@ module TSV
|
|
349
355
|
respawn = true if ENV["RBBT_RESPAWN"] and ENV["RBBT_RESPAWN"] == "true"
|
350
356
|
|
351
357
|
Log.low "Traversing in #{ num } cpus: #{respawn ? "respawn" : "no respawn"}"
|
352
|
-
q = RbbtProcessQueue.new num, cleanup, join, respawn
|
358
|
+
q = RbbtProcessQueue.new num, cleanup, join, respawn, !!bar
|
353
359
|
callback = Proc.new{ bar.tick } if callback.nil? and bar
|
354
360
|
q.callback &callback
|
355
361
|
q.init &block
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -3,13 +3,14 @@ require 'rbbt/util/concurrency/processes/socket'
|
|
3
3
|
|
4
4
|
class RbbtProcessQueue
|
5
5
|
|
6
|
-
attr_accessor :num_processes, :processes, :queue, :process_monitor, :cleanup, :join, :reswpan
|
7
|
-
def initialize(num_processes, cleanup = nil, join = nil, reswpan = nil)
|
6
|
+
attr_accessor :num_processes, :processes, :queue, :process_monitor, :cleanup, :join, :reswpan, :offset
|
7
|
+
def initialize(num_processes, cleanup = nil, join = nil, reswpan = nil, offset = false)
|
8
8
|
@num_processes = num_processes
|
9
9
|
@processes = []
|
10
10
|
@cleanup = cleanup
|
11
11
|
@join = join
|
12
12
|
@respawn = reswpan
|
13
|
+
@offset = offset
|
13
14
|
@queue = RbbtProcessSocket.new
|
14
15
|
end
|
15
16
|
|
@@ -59,7 +60,7 @@ class RbbtProcessQueue
|
|
59
60
|
|
60
61
|
def init(&block)
|
61
62
|
num_processes.times do |i|
|
62
|
-
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, &block)
|
63
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &block)
|
63
64
|
end
|
64
65
|
@queue.close_read
|
65
66
|
|
@@ -120,11 +120,12 @@ class RbbtProcessQueue
|
|
120
120
|
end
|
121
121
|
end
|
122
122
|
|
123
|
-
def initialize(queue, callback_queue = nil, cleanup = nil, respawn = false, &block)
|
124
|
-
@queue, @callback_queue, @cleanup, @block = queue, callback_queue, cleanup, block
|
123
|
+
def initialize(queue, callback_queue = nil, cleanup = nil, respawn = false, offset = false, &block)
|
124
|
+
@queue, @callback_queue, @cleanup, @block, @offset = queue, callback_queue, cleanup, block, offset
|
125
125
|
|
126
126
|
@pid = Process.fork do
|
127
127
|
Misc.pre_fork
|
128
|
+
Log::ProgressBar.add_offset if @offset
|
128
129
|
|
129
130
|
@cleanup.call if @cleanup
|
130
131
|
@queue.close_write
|
@@ -141,6 +142,7 @@ class RbbtProcessQueue
|
|
141
142
|
else
|
142
143
|
run
|
143
144
|
end
|
145
|
+
Log::ProgressBar.remove_offset if @offset
|
144
146
|
end
|
145
147
|
end
|
146
148
|
|
@@ -5,11 +5,30 @@ module Log
|
|
5
5
|
REMOVE = []
|
6
6
|
SILENCED = []
|
7
7
|
|
8
|
+
def self.add_offset
|
9
|
+
@@offset = offset + 1
|
10
|
+
@@offset = 0 if @@offset < 0
|
11
|
+
@@offset
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.remove_offset
|
15
|
+
@@offset = offset - 1
|
16
|
+
@@offset = 0 if @@offset < 0
|
17
|
+
@@offset
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def self.offset
|
22
|
+
@@offset ||= 0
|
23
|
+
@@offset = 0 if @@offset < 0
|
24
|
+
@@offset
|
25
|
+
end
|
26
|
+
|
8
27
|
def self.new_bar(max, options = {})
|
9
28
|
cleanup_bars
|
10
29
|
BAR_MUTEX.synchronize do
|
11
30
|
#Log::LAST.replace "new_bar" if Log::LAST == "progress"
|
12
|
-
options = Misc.add_defaults options, :depth => BARS.length
|
31
|
+
options = Misc.add_defaults options, :depth => BARS.length + Log::ProgressBar.offset
|
13
32
|
BARS << (bar = ProgressBar.new(max, options))
|
14
33
|
bar
|
15
34
|
end
|
data/lib/rbbt/util/misc/bgzf.rb
CHANGED
@@ -65,7 +65,7 @@ module Misc
|
|
65
65
|
else
|
66
66
|
new = "{"
|
67
67
|
obj.each do |k,v|
|
68
|
-
new << k
|
68
|
+
new << fingerprint(k) << '=>' << fingerprint(v) << ' '
|
69
69
|
end
|
70
70
|
if new.length > 1
|
71
71
|
new[-1] = "}"
|
@@ -249,8 +249,8 @@ module Misc
|
|
249
249
|
str
|
250
250
|
end
|
251
251
|
|
252
|
-
|
253
|
-
def self.
|
252
|
+
|
253
|
+
def self.obj2digest(obj)
|
254
254
|
str = obj2str(obj)
|
255
255
|
|
256
256
|
if str.empty?
|
@@ -259,4 +259,8 @@ module Misc
|
|
259
259
|
digest(str)
|
260
260
|
end
|
261
261
|
end
|
262
|
+
|
263
|
+
def self.obj2md5(obj)
|
264
|
+
obj2digest(obj)
|
265
|
+
end
|
262
266
|
end
|
@@ -22,7 +22,7 @@ module RbbtMutiplartPayload
|
|
22
22
|
EOL = "\r\n"
|
23
23
|
|
24
24
|
def self.mutex
|
25
|
-
|
25
|
+
@@mutex ||= Mutex.new
|
26
26
|
end
|
27
27
|
|
28
28
|
def self.input_header(name, filename = nil)
|
@@ -47,17 +47,19 @@ module RbbtMutiplartPayload
|
|
47
47
|
|
48
48
|
def self.add_stream(io, name, content, filename = nil)
|
49
49
|
header = input_header(name, filename)
|
50
|
-
io.write "--" + BOUNDARY + EOL + header + EOL
|
50
|
+
io.write "--" + BOUNDARY + EOL + header + EOL
|
51
51
|
|
52
|
-
|
53
|
-
|
52
|
+
begin
|
53
|
+
while c = content.readpartial(Misc::BLOCK_SIZE)
|
54
|
+
io.write c
|
55
|
+
end
|
56
|
+
rescue EOFError
|
54
57
|
end
|
55
58
|
content.close
|
56
59
|
end
|
57
60
|
|
58
61
|
def self.close_stream(io)
|
59
62
|
io.write "--" + BOUNDARY + "--" + EOL + EOL
|
60
|
-
io.write EOL
|
61
63
|
io.close
|
62
64
|
end
|
63
65
|
|
@@ -121,8 +123,7 @@ module RbbtMutiplartPayload
|
|
121
123
|
req.body = sout.read
|
122
124
|
end
|
123
125
|
|
124
|
-
Misc.open_pipe
|
125
|
-
sleep rand(10).to_f / 5
|
126
|
+
Misc.open_pipe do |sin|
|
126
127
|
Net::HTTP.start(uri.hostname, uri.port) do |http|
|
127
128
|
http.request(req) do |res|
|
128
129
|
url_path = res["RBBT-STREAMING-JOB-URL"]
|
@@ -139,7 +140,6 @@ module RbbtMutiplartPayload
|
|
139
140
|
sin.puts "BULK" if report_type
|
140
141
|
sin.write res.body
|
141
142
|
end
|
142
|
-
sin.close
|
143
143
|
end
|
144
144
|
end
|
145
145
|
end
|
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -16,6 +16,8 @@ module Misc
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
BLOCK_SIZE=1024 * 8
|
20
|
+
|
19
21
|
PIPE_MUTEX = Mutex.new
|
20
22
|
|
21
23
|
OPEN_PIPE_IN = []
|
@@ -87,70 +89,136 @@ module Misc
|
|
87
89
|
sout
|
88
90
|
end
|
89
91
|
|
90
|
-
def self.tee_stream_thread(stream)
|
91
|
-
|
92
|
-
|
92
|
+
#def self.tee_stream_thread(stream)
|
93
|
+
# stream_out1, stream_in1 = Misc.pipe
|
94
|
+
# stream_out2, stream_in2 = Misc.pipe
|
95
|
+
|
96
|
+
# splitter_thread = Thread.new(Thread.current) do |parent|
|
97
|
+
# begin
|
98
|
+
|
99
|
+
# skip1 = skip2 = false
|
100
|
+
# while block = stream.read(1024)
|
101
|
+
|
102
|
+
# begin
|
103
|
+
# stream_in1.write block;
|
104
|
+
# rescue IOError
|
105
|
+
# Log.medium("Tee stream 1 #{Misc.fingerprint stream} IOError: #{$!.message}");
|
106
|
+
# skip1 = true
|
107
|
+
# end unless skip1
|
108
|
+
|
109
|
+
# begin
|
110
|
+
# stream_in2.write block
|
111
|
+
# rescue IOError
|
112
|
+
# Log.medium("Tee stream 2 #{Misc.fingerprint stream} IOError: #{$!.message}");
|
113
|
+
# skip2 = true
|
114
|
+
# end unless skip2
|
115
|
+
|
116
|
+
# end
|
117
|
+
|
118
|
+
# stream_in1.close unless stream_in1.closed?
|
119
|
+
# stream.join if stream.respond_to? :join
|
120
|
+
# stream_in2.close unless stream_in2.closed?
|
121
|
+
# rescue Aborted, Interrupt
|
122
|
+
# stream_out1.abort if stream_out1.respond_to? :abort
|
123
|
+
# stream.abort if stream.respond_to? :abort
|
124
|
+
# stream_out2.abort if stream_out2.respond_to? :abort
|
125
|
+
# Log.medium "Tee aborting #{Misc.fingerprint stream}"
|
126
|
+
# raise $!
|
127
|
+
# rescue Exception
|
128
|
+
# stream_out1.abort if stream_out1.respond_to? :abort
|
129
|
+
# stream.abort if stream.respond_to? :abort
|
130
|
+
# stream_out2.abort if stream_out2.respond_to? :abort
|
131
|
+
# Log.medium "Tee exception #{Misc.fingerprint stream}"
|
132
|
+
# raise $!
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
|
136
|
+
# ConcurrentStream.setup stream_out1, :threads => splitter_thread
|
137
|
+
# ConcurrentStream.setup stream_out2, :threads => splitter_thread
|
138
|
+
|
139
|
+
# [stream_out1, stream_out2]
|
140
|
+
#end
|
141
|
+
|
142
|
+
def self.tee_stream_thread_multiple(stream, num = 2)
|
143
|
+
in_pipes = []
|
144
|
+
out_pipes = []
|
145
|
+
num.times do
|
146
|
+
sout, sin = Misc.pipe
|
147
|
+
in_pipes << sin
|
148
|
+
out_pipes << sout
|
149
|
+
end
|
93
150
|
|
94
151
|
splitter_thread = Thread.new(Thread.current) do |parent|
|
95
152
|
begin
|
96
153
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
skip2 = true
|
112
|
-
end unless skip2
|
113
|
-
|
154
|
+
skip = [false] * num
|
155
|
+
begin
|
156
|
+
while block = stream.readpartial(BLOCK_SIZE)
|
157
|
+
|
158
|
+
in_pipes.each_with_index do |sin,i|
|
159
|
+
begin
|
160
|
+
sin.write block;
|
161
|
+
rescue IOError
|
162
|
+
Log.medium("Tee stream #{i} #{Misc.fingerprint stream} IOError: #{$!.message}");
|
163
|
+
skip[i] = true
|
164
|
+
end unless skip[i]
|
165
|
+
end
|
166
|
+
end
|
167
|
+
rescue IOError
|
114
168
|
end
|
115
169
|
|
116
|
-
|
170
|
+
in_pipes.each do |sin|
|
171
|
+
sin.close unless sin.closed?
|
172
|
+
end
|
117
173
|
stream.join if stream.respond_to? :join
|
118
|
-
stream_in2.close unless stream_in2.closed?
|
119
174
|
rescue Aborted, Interrupt
|
120
|
-
stream_out1.abort if stream_out1.respond_to? :abort
|
121
175
|
stream.abort if stream.respond_to? :abort
|
122
|
-
|
176
|
+
out_pipes.each do |sout|
|
177
|
+
sout.abort if sout.respond_to? :abort
|
178
|
+
end
|
123
179
|
Log.medium "Tee aborting #{Misc.fingerprint stream}"
|
124
180
|
raise $!
|
125
181
|
rescue Exception
|
126
|
-
stream_out1.abort if stream_out1.respond_to? :abort
|
127
182
|
stream.abort if stream.respond_to? :abort
|
128
|
-
|
183
|
+
out_pipes.each do |sout|
|
184
|
+
sout.abort if sout.respond_to? :abort
|
185
|
+
end
|
129
186
|
Log.medium "Tee exception #{Misc.fingerprint stream}"
|
130
187
|
raise $!
|
131
188
|
end
|
132
189
|
end
|
133
190
|
|
134
|
-
|
135
|
-
|
191
|
+
out_pipes.each do |sout|
|
192
|
+
ConcurrentStream.setup sout, :threads => splitter_thread
|
193
|
+
end
|
136
194
|
|
137
|
-
|
138
|
-
|
195
|
+
out_pipes
|
196
|
+
end
|
139
197
|
|
140
|
-
|
141
|
-
|
198
|
+
def self.tee_stream_thread(stream)
|
199
|
+
tee_stream_thread_multiple(stream, 2)
|
200
|
+
end
|
142
201
|
|
143
|
-
|
202
|
+
def self.dup_stream_multiple(stream, num = 1)
|
203
|
+
stream_dup = stream.dup
|
204
|
+
if stream.respond_to? :annotate
|
205
|
+
stream.annotate stream_dup
|
206
|
+
stream.clear
|
207
|
+
end
|
208
|
+
tee1, *rest = Misc.tee_stream stream_dup, num + 1
|
209
|
+
stream.reopen(tee1)
|
210
|
+
rest
|
144
211
|
end
|
145
212
|
|
146
213
|
class << self
|
147
|
-
alias tee_stream
|
214
|
+
alias tee_stream tee_stream_thread_multiple
|
215
|
+
alias dup_stream dup_stream_multiple
|
148
216
|
end
|
149
217
|
|
150
218
|
def self.read_full_stream(io)
|
151
219
|
str = ""
|
152
220
|
begin
|
153
|
-
while block = io.read(
|
221
|
+
while block = io.read(BLOCK_SIZE)
|
154
222
|
str << block
|
155
223
|
end
|
156
224
|
io.join if io.respond_to? :join
|
@@ -182,10 +250,17 @@ module Misc
|
|
182
250
|
begin
|
183
251
|
into = into.find if Path === into
|
184
252
|
into = Open.open(into, :mode => 'w') if String === into
|
185
|
-
into.sync
|
186
|
-
|
187
|
-
|
253
|
+
into.sync = true if IO === into
|
254
|
+
into_close = false unless into.respond_to? :close
|
255
|
+
io.sync = true
|
256
|
+
|
257
|
+
begin
|
258
|
+
while block = io.readpartial(BLOCK_SIZE)
|
259
|
+
into << block if into
|
260
|
+
end
|
261
|
+
rescue EOFError
|
188
262
|
end
|
263
|
+
|
189
264
|
io.join if io.respond_to? :join
|
190
265
|
io.close unless io.closed?
|
191
266
|
into.close if into and into_close and not into.closed?
|
@@ -256,7 +331,7 @@ module Misc
|
|
256
331
|
|
257
332
|
Open.write(tmp_path) do |f|
|
258
333
|
f.sync = true
|
259
|
-
while block = content.read(
|
334
|
+
while block = content.read(BLOCK_SIZE)
|
260
335
|
f.write block
|
261
336
|
end
|
262
337
|
end
|
@@ -340,7 +415,7 @@ module Misc
|
|
340
415
|
|
341
416
|
sorted = CMD.cmd("env LC_ALL=C sort #{cmd_args || ""}", :in => line_stream, :pipe => true)
|
342
417
|
|
343
|
-
while block = sorted.read(
|
418
|
+
while block = sorted.read(BLOCK_SIZE)
|
344
419
|
sin.write block
|
345
420
|
end
|
346
421
|
|
@@ -476,17 +551,6 @@ module Misc
|
|
476
551
|
end
|
477
552
|
end
|
478
553
|
|
479
|
-
def self.dup_stream(stream)
|
480
|
-
stream_dup = stream.dup
|
481
|
-
if stream.respond_to? :annotate
|
482
|
-
stream.annotate stream_dup
|
483
|
-
stream.clear
|
484
|
-
end
|
485
|
-
tee1, tee2 = Misc.tee_stream stream_dup
|
486
|
-
stream.reopen(tee1)
|
487
|
-
tee2
|
488
|
-
end
|
489
|
-
|
490
554
|
def self.save_stream(file, stream)
|
491
555
|
out, save = Misc.tee_stream stream
|
492
556
|
out.filename = file
|