rbbt-util 5.22.1 → 5.22.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/monitor.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +6 -3
- data/lib/rbbt/util/concurrency/processes.rb +186 -56
- data/lib/rbbt/util/concurrency/processes/socket.rb +2 -2
- data/lib/rbbt/util/concurrency/processes/worker.rb +111 -43
- data/lib/rbbt/util/log/progress/util.rb +3 -0
- data/lib/rbbt/util/misc/concurrent_stream.rb +1 -1
- data/lib/rbbt/util/misc/exceptions.rb +3 -1
- data/lib/rbbt/util/misc/inspect.rb +8 -0
- data/lib/rbbt/util/open.rb +4 -1
- data/lib/rbbt/util/semaphore.rb +6 -3
- data/lib/rbbt/workflow/accessor.rb +13 -1
- data/lib/rbbt/workflow/step.rb +0 -2
- data/lib/rbbt/workflow/step/run.rb +3 -2
- data/share/rbbt_commands/system/status +2 -2
- data/share/rbbt_commands/workflow/prov +3 -3
- data/test/rbbt/util/concurrency/test_processes.rb +92 -4
- metadata +2 -3
- data/lib/rbbt/util/concurrency/processes/socket_old.rb +0 -144
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a706bb947413cbe8267aac3c21f7d23668313123
|
4
|
+
data.tar.gz: 835cc701d39015fbdb0379ff483c38513e77a134
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7db876d745f2dfc5dbefb8798145a29f3ef32962a31f1940bc611589fd61a9feb918616cfa1f45c769b2d67767f9eb5a9b487b2f1d35fa845c0bb645d3915f6
|
7
|
+
data.tar.gz: 68fe15aa3feaab425c9d90424f4bdba20206f5f10a541ace9c7a8aef6d956b4c980566feb1253501999d53a58d58bd1d3d91c0b5ec624bae5e333889cdfe24e4
|
data/lib/rbbt/monitor.rb
CHANGED
@@ -146,7 +146,8 @@ module Rbbt
|
|
146
146
|
task = File.basename(taskdir)
|
147
147
|
next if tasks and not tasks.include? task
|
148
148
|
|
149
|
-
cmd = "find -L '#{ taskdir }/' -not \\( -path \"#{taskdir}/*.files\" -prune \\) -not -
|
149
|
+
cmd = "find -L '#{ taskdir }/' -not \\( -path \"#{taskdir}/*.files\" -prune \\) -not -name '*.pid' -not -name '*.notify' -not -name '\\.*' -not -type d 2>/dev/null"
|
150
|
+
|
150
151
|
files = CMD.cmd(cmd, :pipe => true)
|
151
152
|
TSV.traverse files, :type => :array, :into => jobs, :_bar => "Finding jobs in #{ taskdir }" do |file|
|
152
153
|
_files << file
|
@@ -393,7 +393,9 @@ module TSV
|
|
393
393
|
raise $!
|
394
394
|
ensure
|
395
395
|
q.clean
|
396
|
-
|
396
|
+
if bar
|
397
|
+
Log::ProgressBar.remove_bar(bar, error)
|
398
|
+
end
|
397
399
|
end
|
398
400
|
end
|
399
401
|
|
@@ -595,8 +597,9 @@ module TSV
|
|
595
597
|
if into
|
596
598
|
bar = Misc.process_options options, :bar
|
597
599
|
|
598
|
-
options[:join] = Proc.new do
|
599
|
-
|
600
|
+
options[:join] = Proc.new do |error|
|
601
|
+
error = false if error.nil?
|
602
|
+
Log::ProgressBar.remove_bar(bar, error)
|
600
603
|
end if bar
|
601
604
|
|
602
605
|
options[:callback] = Proc.new do |e|
|
@@ -12,6 +12,7 @@ class RbbtProcessQueue
|
|
12
12
|
@respawn = reswpan
|
13
13
|
@offset = offset
|
14
14
|
@queue = RbbtProcessSocket.new
|
15
|
+
@process_mutex = Mutex.new
|
15
16
|
end
|
16
17
|
|
17
18
|
attr_accessor :callback, :callback_queue, :callback_thread
|
@@ -25,7 +26,7 @@ class RbbtProcessQueue
|
|
25
26
|
@callback_thread = Thread.new(Thread.current) do |parent|
|
26
27
|
begin
|
27
28
|
loop do
|
28
|
-
p = @callback_queue.pop
|
29
|
+
p = @callback_queue.pop unless @callback_queue.cleaned
|
29
30
|
|
30
31
|
if Exception === p or (Array === p and Exception === p.first)
|
31
32
|
e = Array === p ? p.first : p
|
@@ -39,14 +40,14 @@ class RbbtProcessQueue
|
|
39
40
|
@callback.call p
|
40
41
|
end
|
41
42
|
end
|
43
|
+
rescue ClosedStream
|
42
44
|
rescue Aborted
|
43
45
|
Log.warn "Callback thread aborted"
|
44
|
-
|
46
|
+
self._abort
|
45
47
|
raise $!
|
46
|
-
rescue ClosedStream
|
47
48
|
rescue Exception
|
48
49
|
Log.warn "Exception captured in callback: #{$!.message}"
|
49
|
-
|
50
|
+
self._abort
|
50
51
|
raise $!
|
51
52
|
ensure
|
52
53
|
|
@@ -59,61 +60,160 @@ class RbbtProcessQueue
|
|
59
60
|
end
|
60
61
|
|
61
62
|
def init(&block)
|
62
|
-
|
63
|
-
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &block)
|
64
|
-
end
|
65
|
-
@queue.close_read
|
63
|
+
@init_block = block
|
66
64
|
|
67
|
-
@
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
65
|
+
@master_pid = Process.fork do
|
66
|
+
if @callback_queue
|
67
|
+
Misc.purge_pipes(@queue.swrite,@queue.sread,@callback_queue.swrite, @callback_queue.sread)
|
68
|
+
else
|
69
|
+
Misc.purge_pipes(@queue.swrite,@queue.sread)
|
70
|
+
end
|
71
|
+
|
72
|
+
@total = num_processes
|
73
|
+
@count = 0
|
74
|
+
@processes = []
|
75
|
+
@close_up = false
|
76
|
+
|
77
|
+
|
78
|
+
Signal.trap(:INT) do
|
79
|
+
@close_up = true
|
80
|
+
@manager_thread.raise TryAgain
|
81
|
+
end
|
82
|
+
|
83
|
+
@manager_thread = Thread.new do
|
84
|
+
while true
|
77
85
|
begin
|
78
|
-
|
79
|
-
|
86
|
+
begin
|
87
|
+
sleep 10
|
88
|
+
rescue TryAgain
|
89
|
+
end
|
90
|
+
|
91
|
+
@process_mutex.synchronize do
|
92
|
+
if @close_up
|
93
|
+
@total.times do
|
94
|
+
@queue.push ClosedStream.new unless @queue.cleaned
|
95
|
+
end unless @processes.empty?
|
96
|
+
@count = 0
|
97
|
+
end
|
98
|
+
while @count > 0
|
99
|
+
@count -= 1
|
100
|
+
@total += 1
|
101
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &@init_block)
|
102
|
+
Log.low "Added process #{@processes.last.pid} to #{Process.pid} (#{@processes.length})"
|
103
|
+
end
|
104
|
+
|
105
|
+
while @count < 0
|
106
|
+
@count += 1
|
107
|
+
next unless @processes.length > 1
|
108
|
+
first = @processes.shift
|
109
|
+
first.stop
|
110
|
+
Log.low "Removed process #{first.pid} from #{Process.pid} (#{@processes.length})"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
rescue TryAgain
|
114
|
+
retry
|
115
|
+
rescue Aborted
|
116
|
+
Log.low "Closing manager thread"
|
117
|
+
raise Aborted
|
118
|
+
rescue Exception
|
119
|
+
Log.exception $!
|
120
|
+
raise Exception
|
80
121
|
end
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
@
|
86
|
-
|
87
|
-
|
88
|
-
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
Signal.trap(:USR1) do
|
126
|
+
@count += 1
|
127
|
+
@manager_thread.raise TryAgain
|
128
|
+
end
|
129
|
+
|
130
|
+
Signal.trap(:USR2) do
|
131
|
+
@count -= 1
|
132
|
+
@manager_thread.raise TryAgain
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
@callback_queue.close_read if @callback_queue
|
137
|
+
|
138
|
+
num_processes.times do |i|
|
139
|
+
@process_mutex.synchronize do
|
140
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &@init_block)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
@monitor_thread = Thread.new do
|
145
|
+
begin
|
146
|
+
while @processes.any?
|
147
|
+
@processes[0].join
|
148
|
+
@processes.shift
|
89
149
|
end
|
90
|
-
|
91
|
-
|
92
|
-
|
150
|
+
rescue Aborted
|
151
|
+
Log.warn "Aborting process monitor"
|
152
|
+
@processes.each{|p| p.abort_and_join}
|
153
|
+
@processes.clear
|
154
|
+
|
155
|
+
@callback_thread.kill if @callback_thread && @callback_thread.alive?
|
156
|
+
@manager_thread.kill if @manager_thread.alive?
|
157
|
+
rescue Exception
|
158
|
+
Log.warn "Process monitor exception: #{$!.message}"
|
159
|
+
@processes.each{|p| p.abort_and_join}
|
160
|
+
@processes.clear
|
161
|
+
|
162
|
+
@callback_thread.kill if @callback_thread && @callback_thread.alive?
|
163
|
+
@manager_thread.kill if @manager_thread.alive?
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
Signal.trap(20) do
|
168
|
+
begin
|
169
|
+
@monitor_thread.raise Aborted.new
|
170
|
+
rescue Exception
|
171
|
+
Log.exception $!
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
begin
|
176
|
+
@monitor_thread.join
|
177
|
+
rescue Exception
|
178
|
+
Log.exception $!
|
93
179
|
end
|
180
|
+
|
181
|
+
Kernel.exit! 0
|
94
182
|
end
|
183
|
+
|
184
|
+
Log.info "Cpu process (#{num_processes}) started with master: #{@master_pid}"
|
185
|
+
|
186
|
+
@queue.close_read
|
187
|
+
end
|
188
|
+
|
189
|
+
def add_process
|
190
|
+
Process.kill :USR1, @master_pid
|
191
|
+
end
|
192
|
+
|
193
|
+
def remove_process
|
194
|
+
Process.kill :USR2, @master_pid
|
95
195
|
end
|
96
196
|
|
97
197
|
def close_callback
|
198
|
+
return unless @callback_thread.alive?
|
98
199
|
begin
|
99
|
-
|
200
|
+
t = Thread.new do
|
201
|
+
@callback_queue.push ClosedStream.new
|
202
|
+
end
|
100
203
|
rescue Exception
|
101
204
|
Log.warn "Error closing callback: #{$!.message}"
|
102
205
|
end
|
103
206
|
@callback_thread.join #if @callback_thread.alive?
|
207
|
+
t.join
|
104
208
|
end
|
105
209
|
|
106
|
-
def
|
107
|
-
|
108
|
-
@processes.length.times do
|
109
|
-
@queue.push ClosedStream.new
|
110
|
-
end if @process_monitor.alive?
|
111
|
-
rescue Exception
|
112
|
-
end
|
113
|
-
|
210
|
+
def _join
|
211
|
+
error = true
|
114
212
|
begin
|
115
|
-
@
|
116
|
-
|
213
|
+
pid, status = Process.waitpid2 @master_pid
|
214
|
+
error = false if status.success?
|
215
|
+
raise ProcessFailed if error
|
216
|
+
rescue Errno::ECHILD
|
117
217
|
rescue Aborted
|
118
218
|
Log.error "Aborted joining queue"
|
119
219
|
raise $!
|
@@ -121,34 +221,64 @@ class RbbtProcessQueue
|
|
121
221
|
Log.error "Exception joining queue: #{$!.message}"
|
122
222
|
raise $!
|
123
223
|
ensure
|
124
|
-
|
224
|
+
if @join
|
225
|
+
if @join.arity == 1
|
226
|
+
@join.call(error)
|
227
|
+
else
|
228
|
+
@join.call
|
229
|
+
end
|
230
|
+
end
|
125
231
|
end
|
126
232
|
|
127
|
-
@join.call if @join
|
128
233
|
end
|
129
234
|
|
130
|
-
def
|
131
|
-
|
132
|
-
|
133
|
-
|
235
|
+
def join
|
236
|
+
begin
|
237
|
+
Process.kill :INT, @master_pid
|
238
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
239
|
+
Log.debug "Cannot kill #{@master_pid}: #{$!.message}"
|
240
|
+
end
|
241
|
+
|
242
|
+
begin
|
243
|
+
_join
|
244
|
+
ensure
|
245
|
+
close_callback if @callback
|
246
|
+
@queue.swrite.close unless @queue.swrite.closed?
|
134
247
|
end
|
248
|
+
@callback_thread.join if @callback_thread
|
249
|
+
self.clean
|
250
|
+
end
|
135
251
|
|
136
|
-
|
137
|
-
|
252
|
+
def _abort
|
253
|
+
begin
|
254
|
+
Process.kill 20, @master_pid
|
255
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
256
|
+
Log.debug "Cannot kill #{@master_pid}: #{$!.message}"
|
257
|
+
end
|
258
|
+
|
259
|
+
begin
|
260
|
+
_join
|
261
|
+
rescue ProcessFailed
|
262
|
+
end
|
138
263
|
end
|
139
264
|
|
140
265
|
def abort
|
266
|
+
_abort
|
267
|
+
(@callback_thread.raise(Aborted.new); @callback_thread.join) if @callback_thread and @callback_thread.alive?
|
268
|
+
raise Aborted.new
|
269
|
+
end
|
270
|
+
|
271
|
+
def clean
|
141
272
|
begin
|
142
|
-
|
143
|
-
|
273
|
+
self.abort if Misc.pid_exists?(@master_pid)
|
274
|
+
|
144
275
|
ensure
|
145
|
-
|
146
|
-
|
147
|
-
rescue ProcessFailed
|
148
|
-
end
|
276
|
+
@queue.clean if @queue
|
277
|
+
@callback_queue.clean if @callback_queue
|
149
278
|
end
|
150
279
|
end
|
151
280
|
|
281
|
+
|
152
282
|
def process(*e)
|
153
283
|
begin
|
154
284
|
@queue.push e
|
@@ -3,12 +3,11 @@ require 'rbbt/util/semaphore'
|
|
3
3
|
class RbbtProcessQueue
|
4
4
|
class RbbtProcessSocket
|
5
5
|
|
6
|
-
attr_accessor :sread, :swrite, :write_sem, :read_sem
|
6
|
+
attr_accessor :sread, :swrite, :write_sem, :read_sem, :cleaned
|
7
7
|
def initialize(serializer = nil)
|
8
8
|
@sread, @swrite = Misc.pipe
|
9
9
|
|
10
10
|
@serializer = serializer || Marshal
|
11
|
-
|
12
11
|
|
13
12
|
@key = "/" << rand(1000000000).to_s << '.' << Process.pid.to_s;
|
14
13
|
@write_sem = @key + '.in'
|
@@ -19,6 +18,7 @@ class RbbtProcessQueue
|
|
19
18
|
end
|
20
19
|
|
21
20
|
def clean
|
21
|
+
@cleaned = true
|
22
22
|
@sread.close unless @sread.closed?
|
23
23
|
@swrite.close unless @swrite.closed?
|
24
24
|
Log.debug "Destroying socket semaphores: #{[@key] * ", "}"
|
@@ -12,48 +12,63 @@ class RbbtProcessQueue
|
|
12
12
|
|
13
13
|
def run
|
14
14
|
begin
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
begin
|
16
|
+
Signal.trap(:INT){
|
17
|
+
Kernel.exit! -1
|
18
|
+
}
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
@respawn = false
|
21
|
+
Signal.trap(:USR1){
|
22
|
+
@respawn = true
|
23
|
+
}
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
@stop = false
|
26
|
+
Signal.trap(:USR2){
|
27
|
+
@stop = true
|
28
|
+
}
|
28
29
|
|
30
|
+
@abort = false
|
31
|
+
Signal.trap(20){
|
32
|
+
@abort = true
|
33
|
+
raise Aborted
|
34
|
+
}
|
29
35
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
loop do
|
37
|
+
p = @queue.pop
|
38
|
+
next if p.nil?
|
39
|
+
raise p if Exception === p
|
40
|
+
raise p.first if Array === p and Exception === p.first
|
41
|
+
begin
|
42
|
+
res = @block.call *p
|
43
|
+
@callback_queue.push res if @callback_queue
|
44
|
+
rescue Respawn
|
45
|
+
@callback_queue.push $!.payload
|
46
|
+
raise $!
|
47
|
+
end
|
48
|
+
raise Respawn if @respawn
|
49
|
+
if @stop
|
50
|
+
Log.high "Worker #{Process.pid} leaving"
|
51
|
+
break
|
52
|
+
end
|
41
53
|
end
|
42
|
-
|
43
|
-
|
54
|
+
Kernel.exit! 0
|
55
|
+
rescue Respawn
|
56
|
+
Kernel.exit! 28
|
57
|
+
rescue ClosedStream
|
58
|
+
rescue Interrupt,Aborted
|
59
|
+
Log.high "Worker #{Process.pid} aborted"
|
60
|
+
rescue SemaphoreInterrupted
|
61
|
+
retry unless @stop
|
62
|
+
Log.high "Worker #{Process.pid} leaving"
|
63
|
+
rescue Exception
|
64
|
+
Log.exception $!
|
65
|
+
@callback_queue.push($!) if @callback_queue
|
66
|
+
Kernel.exit! -1
|
67
|
+
ensure
|
68
|
+
@callback_queue.close_write if @callback_queue
|
44
69
|
end
|
45
|
-
|
46
|
-
|
47
|
-
Kernel.exit! 28
|
48
|
-
rescue ClosedStream
|
49
|
-
rescue Aborted, Interrupt
|
50
|
-
Log.info "Worker #{Process.pid} aborted"
|
51
|
-
rescue Exception
|
52
|
-
Log.exception $!
|
53
|
-
@callback_queue.push($!) if @callback_queue
|
54
|
-
Kernel.exit! -1
|
55
|
-
ensure
|
56
|
-
@callback_queue.close_write if @callback_queue
|
70
|
+
rescue Aborted
|
71
|
+
Log.high "Worker #{Process.pid} aborted"
|
57
72
|
end
|
58
73
|
Kernel.exit! 0
|
59
74
|
end
|
@@ -81,15 +96,16 @@ class RbbtProcessQueue
|
|
81
96
|
begin
|
82
97
|
while true
|
83
98
|
@monitored = true
|
84
|
-
|
85
|
-
|
86
|
-
|
99
|
+
|
100
|
+
current_mem = @current ? Misc.memory_use(@current) : 0
|
101
|
+
if current_mem > memory_cap and not @asked
|
102
|
+
Log.medium "Worker #{@current} for #{Process.pid} asked to respawn -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap} - current: #{current_mem}"
|
87
103
|
RbbtSemaphore.synchronize(@callback_queue.write_sem) do
|
88
104
|
Process.kill "USR1", @current if @current
|
89
105
|
end
|
90
106
|
@asked = true
|
91
107
|
end
|
92
|
-
sleep
|
108
|
+
sleep 2
|
93
109
|
end
|
94
110
|
rescue
|
95
111
|
Log.exception $!
|
@@ -99,6 +115,7 @@ class RbbtProcessQueue
|
|
99
115
|
while ! @monitored
|
100
116
|
sleep 0.1
|
101
117
|
end
|
118
|
+
|
102
119
|
@current = Process.fork do
|
103
120
|
run
|
104
121
|
end
|
@@ -161,14 +178,65 @@ class RbbtProcessQueue
|
|
161
178
|
end
|
162
179
|
|
163
180
|
def join
|
164
|
-
|
165
|
-
|
181
|
+
return unless Misc.pid_exists? @pid
|
182
|
+
begin
|
183
|
+
pid, status = Process.waitpid2 @pid
|
184
|
+
raise ProcessFailed if not status.success?
|
185
|
+
rescue Aborted
|
186
|
+
self.abort
|
187
|
+
raise $!
|
188
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
189
|
+
Log.exception $!
|
190
|
+
rescue ProcessFailed
|
191
|
+
raise $!
|
192
|
+
rescue Exception
|
193
|
+
Log.exception $!
|
194
|
+
raise $!
|
195
|
+
end
|
166
196
|
end
|
167
197
|
|
198
|
+
|
168
199
|
def abort
|
200
|
+
begin
|
201
|
+
Process.kill 20, @pid
|
202
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
203
|
+
rescue Exception
|
204
|
+
Log.exception $!
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def abort_and_join
|
209
|
+
begin
|
210
|
+
Process.kill 20, @pid
|
211
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
212
|
+
Log.low "Already joined worker #{@pid}"
|
213
|
+
return
|
214
|
+
end
|
215
|
+
|
216
|
+
Misc.insist([0,0.05,0.5,1,2]) do
|
217
|
+
begin
|
218
|
+
pid, status = Process.waitpid2 @pid, Process::WNOHANG
|
219
|
+
raise if status.nil?
|
220
|
+
Log.low "Abort and join of #{@pid}"
|
221
|
+
return
|
222
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
223
|
+
Log.low "Already joined worker #{@pid}"
|
224
|
+
return
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
begin
|
230
|
+
Log.low "Forcing abort of #{@pid}"
|
231
|
+
Process.kill 9, @pid
|
232
|
+
pid, status = Process.waitpid2 @pid
|
233
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def stop
|
169
238
|
begin
|
170
239
|
Process.kill :USR2, @pid
|
171
|
-
Process.kill :INT, @pid
|
172
240
|
rescue Errno::ESRCH
|
173
241
|
rescue Exception
|
174
242
|
Log.exception $!
|
@@ -84,7 +84,7 @@ module ConcurrentStream
|
|
84
84
|
if no_fail
|
85
85
|
Log.low "Not failing on exception joining thread in ConcurrenStream: #{filename}"
|
86
86
|
else
|
87
|
-
Log.
|
87
|
+
Log.low "Exception joining thread in ConcurrenStream: #{filename}"
|
88
88
|
raise $!
|
89
89
|
end
|
90
90
|
end
|
@@ -1,13 +1,15 @@
|
|
1
1
|
class RbbtException < StandardError; end
|
2
2
|
class ParameterException < RbbtException; end
|
3
3
|
class FieldNotFoundError < RbbtException;end
|
4
|
-
class TryAgain < RbbtException; end
|
5
4
|
class ClosedStream < RbbtException; end
|
6
5
|
|
7
6
|
class ProcessFailed < RbbtException; end
|
8
7
|
|
9
8
|
class Aborted < StandardError; end
|
10
9
|
|
10
|
+
class TryAgain < StandardError; end
|
11
|
+
class SemaphoreInterrupted < TryAgain; end
|
12
|
+
|
11
13
|
class RemoteServerError < RbbtException; end
|
12
14
|
|
13
15
|
class DependencyError < Aborted
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -472,8 +472,11 @@ module Open
|
|
472
472
|
case
|
473
473
|
when block_given?
|
474
474
|
begin
|
475
|
-
File.open(file, mode)
|
475
|
+
f = File.open(file, mode)
|
476
|
+
begin
|
476
477
|
yield f
|
478
|
+
ensure
|
479
|
+
f.close unless f.closed?
|
477
480
|
end
|
478
481
|
rescue Exception
|
479
482
|
FileUtils.rm file if File.exist? file
|
data/lib/rbbt/util/semaphore.rb
CHANGED
@@ -33,11 +33,13 @@ if continue
|
|
33
33
|
EOF
|
34
34
|
|
35
35
|
builder.c_singleton <<-EOF
|
36
|
-
|
36
|
+
int wait_semaphore(char* name){
|
37
|
+
int ret;
|
37
38
|
sem_t* sem;
|
38
39
|
sem = sem_open(name, 0);
|
39
|
-
sem_wait(sem);
|
40
|
+
ret = sem_wait(sem);
|
40
41
|
sem_close(sem);
|
42
|
+
return(ret);
|
41
43
|
}
|
42
44
|
EOF
|
43
45
|
|
@@ -53,7 +55,8 @@ if continue
|
|
53
55
|
|
54
56
|
SEM_MUTEX = Mutex.new
|
55
57
|
def self.synchronize(sem)
|
56
|
-
RbbtSemaphore.wait_semaphore(sem)
|
58
|
+
ret = RbbtSemaphore.wait_semaphore(sem)
|
59
|
+
raise SemaphoreInterrupted if ret == -1
|
57
60
|
begin
|
58
61
|
yield
|
59
62
|
ensure
|
@@ -50,6 +50,14 @@ class Step
|
|
50
50
|
path.nil? ? nil : path + '.info'
|
51
51
|
end
|
52
52
|
|
53
|
+
def self.tmp_path(path)
|
54
|
+
path = path.find if Path === path
|
55
|
+
path = File.expand_path(path)
|
56
|
+
dir = File.dirname(path)
|
57
|
+
filename = File.basename(path)
|
58
|
+
File.join(dir, '.' << filename)
|
59
|
+
end
|
60
|
+
|
53
61
|
def self.pid_file(path)
|
54
62
|
path.nil? ? nil : path + '.pid'
|
55
63
|
end
|
@@ -447,6 +455,10 @@ class Step
|
|
447
455
|
@files_dir ||= Step.files_dir path
|
448
456
|
end
|
449
457
|
|
458
|
+
def tmp_path
|
459
|
+
@tmp_path ||= Step.tmp_path path
|
460
|
+
end
|
461
|
+
|
450
462
|
def files
|
451
463
|
files = Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path}.collect do |path|
|
452
464
|
Misc.path_relative_to(files_dir, path)
|
@@ -898,7 +910,7 @@ module Workflow
|
|
898
910
|
if inputs.length > 0 or dependencies.any?
|
899
911
|
tagged_jobname = case TAG
|
900
912
|
when :hash
|
901
|
-
hash_str = Misc.obj2digest({:inputs => inputs, :dependencies => dependencies})
|
913
|
+
hash_str = Misc.obj2digest({:inputs => Annotated.purge(inputs), :dependencies => dependencies})
|
902
914
|
jobname + '_' << hash_str
|
903
915
|
when :inputs
|
904
916
|
all_inputs = {}
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -266,9 +266,7 @@ class Step
|
|
266
266
|
|
267
267
|
Misc.insist do
|
268
268
|
Open.rm info_file if Open.exists? info_file
|
269
|
-
#Open.rm info_file + '.lock' if Open.exists? info_file + '.lock'
|
270
269
|
Open.rm path if Open.exists? path
|
271
|
-
#Open.rm path + '.lock' if Open.exists? path + '.lock'
|
272
270
|
Open.rm_rf files_dir if Open.exists? files_dir
|
273
271
|
Open.rm pid_file if Open.exists? pid_file
|
274
272
|
end
|
@@ -364,6 +364,9 @@ class Step
|
|
364
364
|
|
365
365
|
set_info :dependencies, dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]}
|
366
366
|
|
367
|
+
if result.nil? && File.exists?(self.tmp_path) && ! File.exists?(self.path)
|
368
|
+
FileUtils.mv self.tmp_path, self.path
|
369
|
+
end
|
367
370
|
result
|
368
371
|
end
|
369
372
|
|
@@ -526,7 +529,6 @@ class Step
|
|
526
529
|
stream.abort
|
527
530
|
rescue Aborted, Interrupt
|
528
531
|
Log.medium "Aborting job stream #{stream.inspect} ABORTED RETRY -- #{Log.color :blue, path}"
|
529
|
-
Log.exception $!
|
530
532
|
if doretry
|
531
533
|
doretry = false
|
532
534
|
retry
|
@@ -563,7 +565,6 @@ class Step
|
|
563
565
|
retry
|
564
566
|
end
|
565
567
|
rescue Exception
|
566
|
-
Log.exception $!
|
567
568
|
if doretry
|
568
569
|
doretry = false
|
569
570
|
retry
|
@@ -124,7 +124,7 @@ jobs = Rbbt.job_info workflow, task
|
|
124
124
|
workflows = {}
|
125
125
|
|
126
126
|
TSV.traverse jobs, :_bar => "Checking job status" do |file,info|
|
127
|
-
next unless all
|
127
|
+
next unless all || ! info[:done] || ! File.exist?(file)
|
128
128
|
workflow = info[:workflow]
|
129
129
|
task = info[:task]
|
130
130
|
workflows[workflow] ||= {}
|
@@ -182,7 +182,7 @@ workflows.sort.each do |workflow,tasks|
|
|
182
182
|
value = info[field]
|
183
183
|
next if value.nil?
|
184
184
|
value_str = Misc.fingerprint(value)
|
185
|
-
str << "
|
185
|
+
str << "\t#{Log.color :magenta, field}=#{value_str}"
|
186
186
|
end
|
187
187
|
end
|
188
188
|
end
|
@@ -80,7 +80,7 @@ def report_msg(status, name, path, info = nil)
|
|
80
80
|
value = job_inputs[input]
|
81
81
|
next if value.nil?
|
82
82
|
value_str = Misc.fingerprint(value)
|
83
|
-
str << "
|
83
|
+
str << "\t#{Log.color :magenta, input}=#{value_str}"
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
@@ -88,9 +88,9 @@ def report_msg(status, name, path, info = nil)
|
|
88
88
|
$info_fields.each do |field|
|
89
89
|
IndiferentHash.setup(info)
|
90
90
|
value = info[field]
|
91
|
-
next if
|
91
|
+
next if value.nil?
|
92
92
|
value_str = Misc.fingerprint(value)
|
93
|
-
str << "
|
93
|
+
str << "\t#{Log.color :magenta, field}=#{value_str}"
|
94
94
|
end
|
95
95
|
end
|
96
96
|
|
@@ -6,6 +6,68 @@ require 'rbbt/util/concurrency/processes'
|
|
6
6
|
|
7
7
|
class TestConcurrencyProcess < Test::Unit::TestCase
|
8
8
|
|
9
|
+
def setup
|
10
|
+
Log.severity = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_process_throttle
|
14
|
+
q = RbbtProcessQueue.new 10
|
15
|
+
|
16
|
+
times = 500
|
17
|
+
|
18
|
+
res = []
|
19
|
+
q.callback do |v|
|
20
|
+
res << v
|
21
|
+
end
|
22
|
+
|
23
|
+
q.init do |i|
|
24
|
+
sleep 0.001
|
25
|
+
Process.pid
|
26
|
+
end
|
27
|
+
|
28
|
+
times.times do |i|
|
29
|
+
q.process i
|
30
|
+
end
|
31
|
+
|
32
|
+
sleep 1 while res.length < times
|
33
|
+
assert_equal 10, res.uniq.length
|
34
|
+
|
35
|
+
q.add_process
|
36
|
+
sleep 0.1
|
37
|
+
q.add_process
|
38
|
+
sleep 0.1
|
39
|
+
q.add_process
|
40
|
+
sleep 0.1
|
41
|
+
|
42
|
+
times.times do |i|
|
43
|
+
q.process i
|
44
|
+
end
|
45
|
+
|
46
|
+
sleep 1 while res.length < times * 2
|
47
|
+
assert_equal 13, res[(times + 20)..-1].uniq.length
|
48
|
+
|
49
|
+
q.remove_process
|
50
|
+
sleep 0.1
|
51
|
+
q.remove_process
|
52
|
+
sleep 0.1
|
53
|
+
q.remove_process
|
54
|
+
sleep 0.1
|
55
|
+
q.remove_process
|
56
|
+
sleep 0.1
|
57
|
+
q.remove_process
|
58
|
+
sleep 0.1
|
59
|
+
|
60
|
+
times.times do |i|
|
61
|
+
q.process i
|
62
|
+
end
|
63
|
+
|
64
|
+
sleep 1 while res.length < times * 3
|
65
|
+
assert_equal 8, res[(2*times + 30)..-1].uniq.length
|
66
|
+
|
67
|
+
q.join
|
68
|
+
assert_equal times * 3, res.length
|
69
|
+
end
|
70
|
+
|
9
71
|
def test_process
|
10
72
|
q = RbbtProcessQueue.new 10
|
11
73
|
|
@@ -20,7 +82,6 @@ class TestConcurrencyProcess < Test::Unit::TestCase
|
|
20
82
|
end
|
21
83
|
|
22
84
|
times = 500
|
23
|
-
t = TSV.setup({"a" => 1}, :type => :single)
|
24
85
|
|
25
86
|
times.times do |i|
|
26
87
|
q.process i
|
@@ -45,6 +106,34 @@ class TestConcurrencyProcess < Test::Unit::TestCase
|
|
45
106
|
end
|
46
107
|
end
|
47
108
|
|
109
|
+
def test_error
|
110
|
+
assert_raise RbbtException do
|
111
|
+
q = RbbtProcessQueue.new 3
|
112
|
+
|
113
|
+
res = []
|
114
|
+
|
115
|
+
q.callback do |v|
|
116
|
+
res << v
|
117
|
+
end
|
118
|
+
|
119
|
+
q.init do |i|
|
120
|
+
raise RbbtException.new "MY ERROR" if i == 300
|
121
|
+
i * 2
|
122
|
+
end
|
123
|
+
|
124
|
+
times = 500
|
125
|
+
|
126
|
+
times.times do |i|
|
127
|
+
q.process i
|
128
|
+
end
|
129
|
+
|
130
|
+
q.join
|
131
|
+
|
132
|
+
assert_equal times, res.length
|
133
|
+
assert_equal [0, 2, 4], res.sort[0..2]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
48
137
|
def test_process_abort
|
49
138
|
assert_raise Aborted do
|
50
139
|
q = RbbtProcessQueue.new 10
|
@@ -60,15 +149,14 @@ class TestConcurrencyProcess < Test::Unit::TestCase
|
|
60
149
|
end
|
61
150
|
|
62
151
|
times = 500
|
63
|
-
t = TSV.setup({"a" => 1}, :type => :single)
|
64
152
|
|
65
153
|
times.times do |i|
|
66
154
|
q.process i
|
67
155
|
end
|
68
156
|
|
69
|
-
sleep
|
157
|
+
sleep 1
|
70
158
|
q.clean
|
71
|
-
|
159
|
+
sleep 1
|
72
160
|
|
73
161
|
q.join
|
74
162
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.22.
|
4
|
+
version: 5.22.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -271,7 +271,6 @@ files:
|
|
271
271
|
- lib/rbbt/util/concurrency.rb
|
272
272
|
- lib/rbbt/util/concurrency/processes.rb
|
273
273
|
- lib/rbbt/util/concurrency/processes/socket.rb
|
274
|
-
- lib/rbbt/util/concurrency/processes/socket_old.rb
|
275
274
|
- lib/rbbt/util/concurrency/processes/worker.rb
|
276
275
|
- lib/rbbt/util/concurrency/threads.rb
|
277
276
|
- lib/rbbt/util/config.rb
|
@@ -1,144 +0,0 @@
|
|
1
|
-
class RbbtProcessQueue
|
2
|
-
class RbbtProcessSocket
|
3
|
-
|
4
|
-
class ClosedSocket < Exception; end
|
5
|
-
|
6
|
-
attr_accessor :sin, :sout, :in_lockfile, :out_lockfile
|
7
|
-
def initialize(lockfile = nil)
|
8
|
-
@sout, @sin = File.pipe
|
9
|
-
|
10
|
-
lockfile ||= TmpFile.tmp_file
|
11
|
-
|
12
|
-
@lockfile = lockfile
|
13
|
-
@in_lockfile = lockfile + '.in'
|
14
|
-
@out_lockfile = lockfile + '.out'
|
15
|
-
raise "in_lockfile exists?" if File.exists? @in_lockfile
|
16
|
-
raise "out_lockfile exists?" if File.exists? @in_lockfile
|
17
|
-
FileUtils.touch @in_lockfile
|
18
|
-
FileUtils.touch @out_lockfile
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.serialize(obj)
|
22
|
-
dump = nil
|
23
|
-
begin
|
24
|
-
case obj
|
25
|
-
when TSV
|
26
|
-
type = "T"
|
27
|
-
info = obj.info
|
28
|
-
info.delete_if{|k,v| v.nil?}
|
29
|
-
dump = Marshal.dump([info, {}.merge(obj)])
|
30
|
-
else
|
31
|
-
type = "M"
|
32
|
-
dump = Marshal.dump(obj)
|
33
|
-
end
|
34
|
-
payload = [type, dump].pack('A1a*')
|
35
|
-
length = payload.bytesize
|
36
|
-
#Log.info "Writing #{ length }"
|
37
|
-
[length].pack('L') << payload
|
38
|
-
rescue Exception
|
39
|
-
Log.error "Serialize error for: #{Misc.fingerprint obj} - #{Misc.fingerprint dump}"
|
40
|
-
raise $!
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.unserialize(str)
|
45
|
-
begin
|
46
|
-
c, dump = str.unpack("A1a*")
|
47
|
-
case c
|
48
|
-
when "M"
|
49
|
-
return Marshal.load(dump)
|
50
|
-
when "T"
|
51
|
-
info, hash = Marshal.load(dump)
|
52
|
-
return TSV.setup(hash, info)
|
53
|
-
end
|
54
|
-
rescue Exception
|
55
|
-
Log.error "Unserialize error for: #{Misc.fingerprint str}"
|
56
|
-
raise $!
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def read_sout(length)
|
61
|
-
str = ""
|
62
|
-
str << sout.readpartial(length-str.length) while str.length < length
|
63
|
-
str
|
64
|
-
end
|
65
|
-
|
66
|
-
def write_sin(str)
|
67
|
-
str_length = str.length
|
68
|
-
wrote = 0
|
69
|
-
wrote += sin.write_nonblock(str[wrote..-1]) while wrote < str_length
|
70
|
-
end
|
71
|
-
|
72
|
-
def push(obj)
|
73
|
-
Filelock in_lockfile do
|
74
|
-
payload = RbbtProcessSocket.serialize(obj)
|
75
|
-
sin << payload
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
|
80
|
-
def pop
|
81
|
-
r = []
|
82
|
-
|
83
|
-
payload = begin
|
84
|
-
Filelock out_lockfile do
|
85
|
-
raise ClosedQueue if sout.eof?
|
86
|
-
r,w,e = IO.select([sout], [], [], 1)
|
87
|
-
raise TryAgain if r.empty?
|
88
|
-
|
89
|
-
first_char = read_sout(4)
|
90
|
-
length = first_char.unpack('L').first
|
91
|
-
#Log.info "Reading #{ length }"
|
92
|
-
read_sout(length)
|
93
|
-
end
|
94
|
-
rescue TryAgain
|
95
|
-
sleep 1
|
96
|
-
end
|
97
|
-
|
98
|
-
RbbtProcessSocket.unserialize(payload)
|
99
|
-
end
|
100
|
-
|
101
|
-
def pop
|
102
|
-
loop do
|
103
|
-
r,w,e = IO.select([sout], [], [], 1)
|
104
|
-
next if r.empty?
|
105
|
-
break
|
106
|
-
end
|
107
|
-
|
108
|
-
first_char = read_sout(4)
|
109
|
-
length = first_char.unpack('L').first
|
110
|
-
#Log.info "Reading #{ length }"
|
111
|
-
read_sout(length)
|
112
|
-
end
|
113
|
-
rescue TryAgain
|
114
|
-
sleep 1
|
115
|
-
end
|
116
|
-
|
117
|
-
RbbtProcessSocket.unserialize(payload)
|
118
|
-
end
|
119
|
-
|
120
|
-
def rest
|
121
|
-
sin.close
|
122
|
-
str = sout.read
|
123
|
-
res = []
|
124
|
-
|
125
|
-
while not str.empty?
|
126
|
-
first_char = str[0]
|
127
|
-
next if first_char.nil?
|
128
|
-
length = first_char.unpack("C").first
|
129
|
-
dump = str[1..length]
|
130
|
-
res << Marshal.load(dump)
|
131
|
-
str = str[length+1..-1]
|
132
|
-
end
|
133
|
-
|
134
|
-
res
|
135
|
-
end
|
136
|
-
|
137
|
-
def clean
|
138
|
-
FileUtils.rm @in_lockfile if File.exists? @in_lockfile
|
139
|
-
FileUtils.rm @out_lockfile if File.exists? @out_lockfile
|
140
|
-
sin.close unless sin.closed?
|
141
|
-
sout.close unless sout.closed?
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|