rbbt-util 5.22.1 → 5.22.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/monitor.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +6 -3
- data/lib/rbbt/util/concurrency/processes.rb +186 -56
- data/lib/rbbt/util/concurrency/processes/socket.rb +2 -2
- data/lib/rbbt/util/concurrency/processes/worker.rb +111 -43
- data/lib/rbbt/util/log/progress/util.rb +3 -0
- data/lib/rbbt/util/misc/concurrent_stream.rb +1 -1
- data/lib/rbbt/util/misc/exceptions.rb +3 -1
- data/lib/rbbt/util/misc/inspect.rb +8 -0
- data/lib/rbbt/util/open.rb +4 -1
- data/lib/rbbt/util/semaphore.rb +6 -3
- data/lib/rbbt/workflow/accessor.rb +13 -1
- data/lib/rbbt/workflow/step.rb +0 -2
- data/lib/rbbt/workflow/step/run.rb +3 -2
- data/share/rbbt_commands/system/status +2 -2
- data/share/rbbt_commands/workflow/prov +3 -3
- data/test/rbbt/util/concurrency/test_processes.rb +92 -4
- metadata +2 -3
- data/lib/rbbt/util/concurrency/processes/socket_old.rb +0 -144
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a706bb947413cbe8267aac3c21f7d23668313123
|
4
|
+
data.tar.gz: 835cc701d39015fbdb0379ff483c38513e77a134
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7db876d745f2dfc5dbefb8798145a29f3ef32962a31f1940bc611589fd61a9feb918616cfa1f45c769b2d67767f9eb5a9b487b2f1d35fa845c0bb645d3915f6
|
7
|
+
data.tar.gz: 68fe15aa3feaab425c9d90424f4bdba20206f5f10a541ace9c7a8aef6d956b4c980566feb1253501999d53a58d58bd1d3d91c0b5ec624bae5e333889cdfe24e4
|
data/lib/rbbt/monitor.rb
CHANGED
@@ -146,7 +146,8 @@ module Rbbt
|
|
146
146
|
task = File.basename(taskdir)
|
147
147
|
next if tasks and not tasks.include? task
|
148
148
|
|
149
|
-
cmd = "find -L '#{ taskdir }/' -not \\( -path \"#{taskdir}/*.files\" -prune \\) -not -
|
149
|
+
cmd = "find -L '#{ taskdir }/' -not \\( -path \"#{taskdir}/*.files\" -prune \\) -not -name '*.pid' -not -name '*.notify' -not -name '\\.*' -not -type d 2>/dev/null"
|
150
|
+
|
150
151
|
files = CMD.cmd(cmd, :pipe => true)
|
151
152
|
TSV.traverse files, :type => :array, :into => jobs, :_bar => "Finding jobs in #{ taskdir }" do |file|
|
152
153
|
_files << file
|
@@ -393,7 +393,9 @@ module TSV
|
|
393
393
|
raise $!
|
394
394
|
ensure
|
395
395
|
q.clean
|
396
|
-
|
396
|
+
if bar
|
397
|
+
Log::ProgressBar.remove_bar(bar, error)
|
398
|
+
end
|
397
399
|
end
|
398
400
|
end
|
399
401
|
|
@@ -595,8 +597,9 @@ module TSV
|
|
595
597
|
if into
|
596
598
|
bar = Misc.process_options options, :bar
|
597
599
|
|
598
|
-
options[:join] = Proc.new do
|
599
|
-
|
600
|
+
options[:join] = Proc.new do |error|
|
601
|
+
error = false if error.nil?
|
602
|
+
Log::ProgressBar.remove_bar(bar, error)
|
600
603
|
end if bar
|
601
604
|
|
602
605
|
options[:callback] = Proc.new do |e|
|
@@ -12,6 +12,7 @@ class RbbtProcessQueue
|
|
12
12
|
@respawn = reswpan
|
13
13
|
@offset = offset
|
14
14
|
@queue = RbbtProcessSocket.new
|
15
|
+
@process_mutex = Mutex.new
|
15
16
|
end
|
16
17
|
|
17
18
|
attr_accessor :callback, :callback_queue, :callback_thread
|
@@ -25,7 +26,7 @@ class RbbtProcessQueue
|
|
25
26
|
@callback_thread = Thread.new(Thread.current) do |parent|
|
26
27
|
begin
|
27
28
|
loop do
|
28
|
-
p = @callback_queue.pop
|
29
|
+
p = @callback_queue.pop unless @callback_queue.cleaned
|
29
30
|
|
30
31
|
if Exception === p or (Array === p and Exception === p.first)
|
31
32
|
e = Array === p ? p.first : p
|
@@ -39,14 +40,14 @@ class RbbtProcessQueue
|
|
39
40
|
@callback.call p
|
40
41
|
end
|
41
42
|
end
|
43
|
+
rescue ClosedStream
|
42
44
|
rescue Aborted
|
43
45
|
Log.warn "Callback thread aborted"
|
44
|
-
|
46
|
+
self._abort
|
45
47
|
raise $!
|
46
|
-
rescue ClosedStream
|
47
48
|
rescue Exception
|
48
49
|
Log.warn "Exception captured in callback: #{$!.message}"
|
49
|
-
|
50
|
+
self._abort
|
50
51
|
raise $!
|
51
52
|
ensure
|
52
53
|
|
@@ -59,61 +60,160 @@ class RbbtProcessQueue
|
|
59
60
|
end
|
60
61
|
|
61
62
|
def init(&block)
|
62
|
-
|
63
|
-
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &block)
|
64
|
-
end
|
65
|
-
@queue.close_read
|
63
|
+
@init_block = block
|
66
64
|
|
67
|
-
@
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
65
|
+
@master_pid = Process.fork do
|
66
|
+
if @callback_queue
|
67
|
+
Misc.purge_pipes(@queue.swrite,@queue.sread,@callback_queue.swrite, @callback_queue.sread)
|
68
|
+
else
|
69
|
+
Misc.purge_pipes(@queue.swrite,@queue.sread)
|
70
|
+
end
|
71
|
+
|
72
|
+
@total = num_processes
|
73
|
+
@count = 0
|
74
|
+
@processes = []
|
75
|
+
@close_up = false
|
76
|
+
|
77
|
+
|
78
|
+
Signal.trap(:INT) do
|
79
|
+
@close_up = true
|
80
|
+
@manager_thread.raise TryAgain
|
81
|
+
end
|
82
|
+
|
83
|
+
@manager_thread = Thread.new do
|
84
|
+
while true
|
77
85
|
begin
|
78
|
-
|
79
|
-
|
86
|
+
begin
|
87
|
+
sleep 10
|
88
|
+
rescue TryAgain
|
89
|
+
end
|
90
|
+
|
91
|
+
@process_mutex.synchronize do
|
92
|
+
if @close_up
|
93
|
+
@total.times do
|
94
|
+
@queue.push ClosedStream.new unless @queue.cleaned
|
95
|
+
end unless @processes.empty?
|
96
|
+
@count = 0
|
97
|
+
end
|
98
|
+
while @count > 0
|
99
|
+
@count -= 1
|
100
|
+
@total += 1
|
101
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &@init_block)
|
102
|
+
Log.low "Added process #{@processes.last.pid} to #{Process.pid} (#{@processes.length})"
|
103
|
+
end
|
104
|
+
|
105
|
+
while @count < 0
|
106
|
+
@count += 1
|
107
|
+
next unless @processes.length > 1
|
108
|
+
first = @processes.shift
|
109
|
+
first.stop
|
110
|
+
Log.low "Removed process #{first.pid} from #{Process.pid} (#{@processes.length})"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
rescue TryAgain
|
114
|
+
retry
|
115
|
+
rescue Aborted
|
116
|
+
Log.low "Closing manager thread"
|
117
|
+
raise Aborted
|
118
|
+
rescue Exception
|
119
|
+
Log.exception $!
|
120
|
+
raise Exception
|
80
121
|
end
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
@
|
86
|
-
|
87
|
-
|
88
|
-
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
Signal.trap(:USR1) do
|
126
|
+
@count += 1
|
127
|
+
@manager_thread.raise TryAgain
|
128
|
+
end
|
129
|
+
|
130
|
+
Signal.trap(:USR2) do
|
131
|
+
@count -= 1
|
132
|
+
@manager_thread.raise TryAgain
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
@callback_queue.close_read if @callback_queue
|
137
|
+
|
138
|
+
num_processes.times do |i|
|
139
|
+
@process_mutex.synchronize do
|
140
|
+
@processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, @cleanup, @respawn, @offset, &@init_block)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
@monitor_thread = Thread.new do
|
145
|
+
begin
|
146
|
+
while @processes.any?
|
147
|
+
@processes[0].join
|
148
|
+
@processes.shift
|
89
149
|
end
|
90
|
-
|
91
|
-
|
92
|
-
|
150
|
+
rescue Aborted
|
151
|
+
Log.warn "Aborting process monitor"
|
152
|
+
@processes.each{|p| p.abort_and_join}
|
153
|
+
@processes.clear
|
154
|
+
|
155
|
+
@callback_thread.kill if @callback_thread && @callback_thread.alive?
|
156
|
+
@manager_thread.kill if @manager_thread.alive?
|
157
|
+
rescue Exception
|
158
|
+
Log.warn "Process monitor exception: #{$!.message}"
|
159
|
+
@processes.each{|p| p.abort_and_join}
|
160
|
+
@processes.clear
|
161
|
+
|
162
|
+
@callback_thread.kill if @callback_thread && @callback_thread.alive?
|
163
|
+
@manager_thread.kill if @manager_thread.alive?
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
Signal.trap(20) do
|
168
|
+
begin
|
169
|
+
@monitor_thread.raise Aborted.new
|
170
|
+
rescue Exception
|
171
|
+
Log.exception $!
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
begin
|
176
|
+
@monitor_thread.join
|
177
|
+
rescue Exception
|
178
|
+
Log.exception $!
|
93
179
|
end
|
180
|
+
|
181
|
+
Kernel.exit! 0
|
94
182
|
end
|
183
|
+
|
184
|
+
Log.info "Cpu process (#{num_processes}) started with master: #{@master_pid}"
|
185
|
+
|
186
|
+
@queue.close_read
|
187
|
+
end
|
188
|
+
|
189
|
+
def add_process
|
190
|
+
Process.kill :USR1, @master_pid
|
191
|
+
end
|
192
|
+
|
193
|
+
def remove_process
|
194
|
+
Process.kill :USR2, @master_pid
|
95
195
|
end
|
96
196
|
|
97
197
|
def close_callback
|
198
|
+
return unless @callback_thread.alive?
|
98
199
|
begin
|
99
|
-
|
200
|
+
t = Thread.new do
|
201
|
+
@callback_queue.push ClosedStream.new
|
202
|
+
end
|
100
203
|
rescue Exception
|
101
204
|
Log.warn "Error closing callback: #{$!.message}"
|
102
205
|
end
|
103
206
|
@callback_thread.join #if @callback_thread.alive?
|
207
|
+
t.join
|
104
208
|
end
|
105
209
|
|
106
|
-
def
|
107
|
-
|
108
|
-
@processes.length.times do
|
109
|
-
@queue.push ClosedStream.new
|
110
|
-
end if @process_monitor.alive?
|
111
|
-
rescue Exception
|
112
|
-
end
|
113
|
-
|
210
|
+
def _join
|
211
|
+
error = true
|
114
212
|
begin
|
115
|
-
@
|
116
|
-
|
213
|
+
pid, status = Process.waitpid2 @master_pid
|
214
|
+
error = false if status.success?
|
215
|
+
raise ProcessFailed if error
|
216
|
+
rescue Errno::ECHILD
|
117
217
|
rescue Aborted
|
118
218
|
Log.error "Aborted joining queue"
|
119
219
|
raise $!
|
@@ -121,34 +221,64 @@ class RbbtProcessQueue
|
|
121
221
|
Log.error "Exception joining queue: #{$!.message}"
|
122
222
|
raise $!
|
123
223
|
ensure
|
124
|
-
|
224
|
+
if @join
|
225
|
+
if @join.arity == 1
|
226
|
+
@join.call(error)
|
227
|
+
else
|
228
|
+
@join.call
|
229
|
+
end
|
230
|
+
end
|
125
231
|
end
|
126
232
|
|
127
|
-
@join.call if @join
|
128
233
|
end
|
129
234
|
|
130
|
-
def
|
131
|
-
|
132
|
-
|
133
|
-
|
235
|
+
def join
|
236
|
+
begin
|
237
|
+
Process.kill :INT, @master_pid
|
238
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
239
|
+
Log.debug "Cannot kill #{@master_pid}: #{$!.message}"
|
240
|
+
end
|
241
|
+
|
242
|
+
begin
|
243
|
+
_join
|
244
|
+
ensure
|
245
|
+
close_callback if @callback
|
246
|
+
@queue.swrite.close unless @queue.swrite.closed?
|
134
247
|
end
|
248
|
+
@callback_thread.join if @callback_thread
|
249
|
+
self.clean
|
250
|
+
end
|
135
251
|
|
136
|
-
|
137
|
-
|
252
|
+
def _abort
|
253
|
+
begin
|
254
|
+
Process.kill 20, @master_pid
|
255
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
256
|
+
Log.debug "Cannot kill #{@master_pid}: #{$!.message}"
|
257
|
+
end
|
258
|
+
|
259
|
+
begin
|
260
|
+
_join
|
261
|
+
rescue ProcessFailed
|
262
|
+
end
|
138
263
|
end
|
139
264
|
|
140
265
|
def abort
|
266
|
+
_abort
|
267
|
+
(@callback_thread.raise(Aborted.new); @callback_thread.join) if @callback_thread and @callback_thread.alive?
|
268
|
+
raise Aborted.new
|
269
|
+
end
|
270
|
+
|
271
|
+
def clean
|
141
272
|
begin
|
142
|
-
|
143
|
-
|
273
|
+
self.abort if Misc.pid_exists?(@master_pid)
|
274
|
+
|
144
275
|
ensure
|
145
|
-
|
146
|
-
|
147
|
-
rescue ProcessFailed
|
148
|
-
end
|
276
|
+
@queue.clean if @queue
|
277
|
+
@callback_queue.clean if @callback_queue
|
149
278
|
end
|
150
279
|
end
|
151
280
|
|
281
|
+
|
152
282
|
def process(*e)
|
153
283
|
begin
|
154
284
|
@queue.push e
|
@@ -3,12 +3,11 @@ require 'rbbt/util/semaphore'
|
|
3
3
|
class RbbtProcessQueue
|
4
4
|
class RbbtProcessSocket
|
5
5
|
|
6
|
-
attr_accessor :sread, :swrite, :write_sem, :read_sem
|
6
|
+
attr_accessor :sread, :swrite, :write_sem, :read_sem, :cleaned
|
7
7
|
def initialize(serializer = nil)
|
8
8
|
@sread, @swrite = Misc.pipe
|
9
9
|
|
10
10
|
@serializer = serializer || Marshal
|
11
|
-
|
12
11
|
|
13
12
|
@key = "/" << rand(1000000000).to_s << '.' << Process.pid.to_s;
|
14
13
|
@write_sem = @key + '.in'
|
@@ -19,6 +18,7 @@ class RbbtProcessQueue
|
|
19
18
|
end
|
20
19
|
|
21
20
|
def clean
|
21
|
+
@cleaned = true
|
22
22
|
@sread.close unless @sread.closed?
|
23
23
|
@swrite.close unless @swrite.closed?
|
24
24
|
Log.debug "Destroying socket semaphores: #{[@key] * ", "}"
|
@@ -12,48 +12,63 @@ class RbbtProcessQueue
|
|
12
12
|
|
13
13
|
def run
|
14
14
|
begin
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
begin
|
16
|
+
Signal.trap(:INT){
|
17
|
+
Kernel.exit! -1
|
18
|
+
}
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
@respawn = false
|
21
|
+
Signal.trap(:USR1){
|
22
|
+
@respawn = true
|
23
|
+
}
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
@stop = false
|
26
|
+
Signal.trap(:USR2){
|
27
|
+
@stop = true
|
28
|
+
}
|
28
29
|
|
30
|
+
@abort = false
|
31
|
+
Signal.trap(20){
|
32
|
+
@abort = true
|
33
|
+
raise Aborted
|
34
|
+
}
|
29
35
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
loop do
|
37
|
+
p = @queue.pop
|
38
|
+
next if p.nil?
|
39
|
+
raise p if Exception === p
|
40
|
+
raise p.first if Array === p and Exception === p.first
|
41
|
+
begin
|
42
|
+
res = @block.call *p
|
43
|
+
@callback_queue.push res if @callback_queue
|
44
|
+
rescue Respawn
|
45
|
+
@callback_queue.push $!.payload
|
46
|
+
raise $!
|
47
|
+
end
|
48
|
+
raise Respawn if @respawn
|
49
|
+
if @stop
|
50
|
+
Log.high "Worker #{Process.pid} leaving"
|
51
|
+
break
|
52
|
+
end
|
41
53
|
end
|
42
|
-
|
43
|
-
|
54
|
+
Kernel.exit! 0
|
55
|
+
rescue Respawn
|
56
|
+
Kernel.exit! 28
|
57
|
+
rescue ClosedStream
|
58
|
+
rescue Interrupt,Aborted
|
59
|
+
Log.high "Worker #{Process.pid} aborted"
|
60
|
+
rescue SemaphoreInterrupted
|
61
|
+
retry unless @stop
|
62
|
+
Log.high "Worker #{Process.pid} leaving"
|
63
|
+
rescue Exception
|
64
|
+
Log.exception $!
|
65
|
+
@callback_queue.push($!) if @callback_queue
|
66
|
+
Kernel.exit! -1
|
67
|
+
ensure
|
68
|
+
@callback_queue.close_write if @callback_queue
|
44
69
|
end
|
45
|
-
|
46
|
-
|
47
|
-
Kernel.exit! 28
|
48
|
-
rescue ClosedStream
|
49
|
-
rescue Aborted, Interrupt
|
50
|
-
Log.info "Worker #{Process.pid} aborted"
|
51
|
-
rescue Exception
|
52
|
-
Log.exception $!
|
53
|
-
@callback_queue.push($!) if @callback_queue
|
54
|
-
Kernel.exit! -1
|
55
|
-
ensure
|
56
|
-
@callback_queue.close_write if @callback_queue
|
70
|
+
rescue Aborted
|
71
|
+
Log.high "Worker #{Process.pid} aborted"
|
57
72
|
end
|
58
73
|
Kernel.exit! 0
|
59
74
|
end
|
@@ -81,15 +96,16 @@ class RbbtProcessQueue
|
|
81
96
|
begin
|
82
97
|
while true
|
83
98
|
@monitored = true
|
84
|
-
|
85
|
-
|
86
|
-
|
99
|
+
|
100
|
+
current_mem = @current ? Misc.memory_use(@current) : 0
|
101
|
+
if current_mem > memory_cap and not @asked
|
102
|
+
Log.medium "Worker #{@current} for #{Process.pid} asked to respawn -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap} - current: #{current_mem}"
|
87
103
|
RbbtSemaphore.synchronize(@callback_queue.write_sem) do
|
88
104
|
Process.kill "USR1", @current if @current
|
89
105
|
end
|
90
106
|
@asked = true
|
91
107
|
end
|
92
|
-
sleep
|
108
|
+
sleep 2
|
93
109
|
end
|
94
110
|
rescue
|
95
111
|
Log.exception $!
|
@@ -99,6 +115,7 @@ class RbbtProcessQueue
|
|
99
115
|
while ! @monitored
|
100
116
|
sleep 0.1
|
101
117
|
end
|
118
|
+
|
102
119
|
@current = Process.fork do
|
103
120
|
run
|
104
121
|
end
|
@@ -161,14 +178,65 @@ class RbbtProcessQueue
|
|
161
178
|
end
|
162
179
|
|
163
180
|
def join
|
164
|
-
|
165
|
-
|
181
|
+
return unless Misc.pid_exists? @pid
|
182
|
+
begin
|
183
|
+
pid, status = Process.waitpid2 @pid
|
184
|
+
raise ProcessFailed if not status.success?
|
185
|
+
rescue Aborted
|
186
|
+
self.abort
|
187
|
+
raise $!
|
188
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
189
|
+
Log.exception $!
|
190
|
+
rescue ProcessFailed
|
191
|
+
raise $!
|
192
|
+
rescue Exception
|
193
|
+
Log.exception $!
|
194
|
+
raise $!
|
195
|
+
end
|
166
196
|
end
|
167
197
|
|
198
|
+
|
168
199
|
def abort
|
200
|
+
begin
|
201
|
+
Process.kill 20, @pid
|
202
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
203
|
+
rescue Exception
|
204
|
+
Log.exception $!
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def abort_and_join
|
209
|
+
begin
|
210
|
+
Process.kill 20, @pid
|
211
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
212
|
+
Log.low "Already joined worker #{@pid}"
|
213
|
+
return
|
214
|
+
end
|
215
|
+
|
216
|
+
Misc.insist([0,0.05,0.5,1,2]) do
|
217
|
+
begin
|
218
|
+
pid, status = Process.waitpid2 @pid, Process::WNOHANG
|
219
|
+
raise if status.nil?
|
220
|
+
Log.low "Abort and join of #{@pid}"
|
221
|
+
return
|
222
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
223
|
+
Log.low "Already joined worker #{@pid}"
|
224
|
+
return
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
begin
|
230
|
+
Log.low "Forcing abort of #{@pid}"
|
231
|
+
Process.kill 9, @pid
|
232
|
+
pid, status = Process.waitpid2 @pid
|
233
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def stop
|
169
238
|
begin
|
170
239
|
Process.kill :USR2, @pid
|
171
|
-
Process.kill :INT, @pid
|
172
240
|
rescue Errno::ESRCH
|
173
241
|
rescue Exception
|
174
242
|
Log.exception $!
|
@@ -84,7 +84,7 @@ module ConcurrentStream
|
|
84
84
|
if no_fail
|
85
85
|
Log.low "Not failing on exception joining thread in ConcurrenStream: #{filename}"
|
86
86
|
else
|
87
|
-
Log.
|
87
|
+
Log.low "Exception joining thread in ConcurrenStream: #{filename}"
|
88
88
|
raise $!
|
89
89
|
end
|
90
90
|
end
|
@@ -1,13 +1,15 @@
|
|
1
1
|
class RbbtException < StandardError; end
|
2
2
|
class ParameterException < RbbtException; end
|
3
3
|
class FieldNotFoundError < RbbtException;end
|
4
|
-
class TryAgain < RbbtException; end
|
5
4
|
class ClosedStream < RbbtException; end
|
6
5
|
|
7
6
|
class ProcessFailed < RbbtException; end
|
8
7
|
|
9
8
|
class Aborted < StandardError; end
|
10
9
|
|
10
|
+
class TryAgain < StandardError; end
|
11
|
+
class SemaphoreInterrupted < TryAgain; end
|
12
|
+
|
11
13
|
class RemoteServerError < RbbtException; end
|
12
14
|
|
13
15
|
class DependencyError < Aborted
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -472,8 +472,11 @@ module Open
|
|
472
472
|
case
|
473
473
|
when block_given?
|
474
474
|
begin
|
475
|
-
File.open(file, mode)
|
475
|
+
f = File.open(file, mode)
|
476
|
+
begin
|
476
477
|
yield f
|
478
|
+
ensure
|
479
|
+
f.close unless f.closed?
|
477
480
|
end
|
478
481
|
rescue Exception
|
479
482
|
FileUtils.rm file if File.exist? file
|
data/lib/rbbt/util/semaphore.rb
CHANGED
@@ -33,11 +33,13 @@ if continue
|
|
33
33
|
EOF
|
34
34
|
|
35
35
|
builder.c_singleton <<-EOF
|
36
|
-
|
36
|
+
int wait_semaphore(char* name){
|
37
|
+
int ret;
|
37
38
|
sem_t* sem;
|
38
39
|
sem = sem_open(name, 0);
|
39
|
-
sem_wait(sem);
|
40
|
+
ret = sem_wait(sem);
|
40
41
|
sem_close(sem);
|
42
|
+
return(ret);
|
41
43
|
}
|
42
44
|
EOF
|
43
45
|
|
@@ -53,7 +55,8 @@ if continue
|
|
53
55
|
|
54
56
|
SEM_MUTEX = Mutex.new
|
55
57
|
def self.synchronize(sem)
|
56
|
-
RbbtSemaphore.wait_semaphore(sem)
|
58
|
+
ret = RbbtSemaphore.wait_semaphore(sem)
|
59
|
+
raise SemaphoreInterrupted if ret == -1
|
57
60
|
begin
|
58
61
|
yield
|
59
62
|
ensure
|
@@ -50,6 +50,14 @@ class Step
|
|
50
50
|
path.nil? ? nil : path + '.info'
|
51
51
|
end
|
52
52
|
|
53
|
+
def self.tmp_path(path)
|
54
|
+
path = path.find if Path === path
|
55
|
+
path = File.expand_path(path)
|
56
|
+
dir = File.dirname(path)
|
57
|
+
filename = File.basename(path)
|
58
|
+
File.join(dir, '.' << filename)
|
59
|
+
end
|
60
|
+
|
53
61
|
def self.pid_file(path)
|
54
62
|
path.nil? ? nil : path + '.pid'
|
55
63
|
end
|
@@ -447,6 +455,10 @@ class Step
|
|
447
455
|
@files_dir ||= Step.files_dir path
|
448
456
|
end
|
449
457
|
|
458
|
+
def tmp_path
|
459
|
+
@tmp_path ||= Step.tmp_path path
|
460
|
+
end
|
461
|
+
|
450
462
|
def files
|
451
463
|
files = Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path}.collect do |path|
|
452
464
|
Misc.path_relative_to(files_dir, path)
|
@@ -898,7 +910,7 @@ module Workflow
|
|
898
910
|
if inputs.length > 0 or dependencies.any?
|
899
911
|
tagged_jobname = case TAG
|
900
912
|
when :hash
|
901
|
-
hash_str = Misc.obj2digest({:inputs => inputs, :dependencies => dependencies})
|
913
|
+
hash_str = Misc.obj2digest({:inputs => Annotated.purge(inputs), :dependencies => dependencies})
|
902
914
|
jobname + '_' << hash_str
|
903
915
|
when :inputs
|
904
916
|
all_inputs = {}
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -266,9 +266,7 @@ class Step
|
|
266
266
|
|
267
267
|
Misc.insist do
|
268
268
|
Open.rm info_file if Open.exists? info_file
|
269
|
-
#Open.rm info_file + '.lock' if Open.exists? info_file + '.lock'
|
270
269
|
Open.rm path if Open.exists? path
|
271
|
-
#Open.rm path + '.lock' if Open.exists? path + '.lock'
|
272
270
|
Open.rm_rf files_dir if Open.exists? files_dir
|
273
271
|
Open.rm pid_file if Open.exists? pid_file
|
274
272
|
end
|
@@ -364,6 +364,9 @@ class Step
|
|
364
364
|
|
365
365
|
set_info :dependencies, dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]}
|
366
366
|
|
367
|
+
if result.nil? && File.exists?(self.tmp_path) && ! File.exists?(self.path)
|
368
|
+
FileUtils.mv self.tmp_path, self.path
|
369
|
+
end
|
367
370
|
result
|
368
371
|
end
|
369
372
|
|
@@ -526,7 +529,6 @@ class Step
|
|
526
529
|
stream.abort
|
527
530
|
rescue Aborted, Interrupt
|
528
531
|
Log.medium "Aborting job stream #{stream.inspect} ABORTED RETRY -- #{Log.color :blue, path}"
|
529
|
-
Log.exception $!
|
530
532
|
if doretry
|
531
533
|
doretry = false
|
532
534
|
retry
|
@@ -563,7 +565,6 @@ class Step
|
|
563
565
|
retry
|
564
566
|
end
|
565
567
|
rescue Exception
|
566
|
-
Log.exception $!
|
567
568
|
if doretry
|
568
569
|
doretry = false
|
569
570
|
retry
|
@@ -124,7 +124,7 @@ jobs = Rbbt.job_info workflow, task
|
|
124
124
|
workflows = {}
|
125
125
|
|
126
126
|
TSV.traverse jobs, :_bar => "Checking job status" do |file,info|
|
127
|
-
next unless all
|
127
|
+
next unless all || ! info[:done] || ! File.exist?(file)
|
128
128
|
workflow = info[:workflow]
|
129
129
|
task = info[:task]
|
130
130
|
workflows[workflow] ||= {}
|
@@ -182,7 +182,7 @@ workflows.sort.each do |workflow,tasks|
|
|
182
182
|
value = info[field]
|
183
183
|
next if value.nil?
|
184
184
|
value_str = Misc.fingerprint(value)
|
185
|
-
str << "
|
185
|
+
str << "\t#{Log.color :magenta, field}=#{value_str}"
|
186
186
|
end
|
187
187
|
end
|
188
188
|
end
|
@@ -80,7 +80,7 @@ def report_msg(status, name, path, info = nil)
|
|
80
80
|
value = job_inputs[input]
|
81
81
|
next if value.nil?
|
82
82
|
value_str = Misc.fingerprint(value)
|
83
|
-
str << "
|
83
|
+
str << "\t#{Log.color :magenta, input}=#{value_str}"
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
@@ -88,9 +88,9 @@ def report_msg(status, name, path, info = nil)
|
|
88
88
|
$info_fields.each do |field|
|
89
89
|
IndiferentHash.setup(info)
|
90
90
|
value = info[field]
|
91
|
-
next if
|
91
|
+
next if value.nil?
|
92
92
|
value_str = Misc.fingerprint(value)
|
93
|
-
str << "
|
93
|
+
str << "\t#{Log.color :magenta, field}=#{value_str}"
|
94
94
|
end
|
95
95
|
end
|
96
96
|
|
@@ -6,6 +6,68 @@ require 'rbbt/util/concurrency/processes'
|
|
6
6
|
|
7
7
|
class TestConcurrencyProcess < Test::Unit::TestCase
|
8
8
|
|
9
|
+
def setup
|
10
|
+
Log.severity = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_process_throttle
|
14
|
+
q = RbbtProcessQueue.new 10
|
15
|
+
|
16
|
+
times = 500
|
17
|
+
|
18
|
+
res = []
|
19
|
+
q.callback do |v|
|
20
|
+
res << v
|
21
|
+
end
|
22
|
+
|
23
|
+
q.init do |i|
|
24
|
+
sleep 0.001
|
25
|
+
Process.pid
|
26
|
+
end
|
27
|
+
|
28
|
+
times.times do |i|
|
29
|
+
q.process i
|
30
|
+
end
|
31
|
+
|
32
|
+
sleep 1 while res.length < times
|
33
|
+
assert_equal 10, res.uniq.length
|
34
|
+
|
35
|
+
q.add_process
|
36
|
+
sleep 0.1
|
37
|
+
q.add_process
|
38
|
+
sleep 0.1
|
39
|
+
q.add_process
|
40
|
+
sleep 0.1
|
41
|
+
|
42
|
+
times.times do |i|
|
43
|
+
q.process i
|
44
|
+
end
|
45
|
+
|
46
|
+
sleep 1 while res.length < times * 2
|
47
|
+
assert_equal 13, res[(times + 20)..-1].uniq.length
|
48
|
+
|
49
|
+
q.remove_process
|
50
|
+
sleep 0.1
|
51
|
+
q.remove_process
|
52
|
+
sleep 0.1
|
53
|
+
q.remove_process
|
54
|
+
sleep 0.1
|
55
|
+
q.remove_process
|
56
|
+
sleep 0.1
|
57
|
+
q.remove_process
|
58
|
+
sleep 0.1
|
59
|
+
|
60
|
+
times.times do |i|
|
61
|
+
q.process i
|
62
|
+
end
|
63
|
+
|
64
|
+
sleep 1 while res.length < times * 3
|
65
|
+
assert_equal 8, res[(2*times + 30)..-1].uniq.length
|
66
|
+
|
67
|
+
q.join
|
68
|
+
assert_equal times * 3, res.length
|
69
|
+
end
|
70
|
+
|
9
71
|
def test_process
|
10
72
|
q = RbbtProcessQueue.new 10
|
11
73
|
|
@@ -20,7 +82,6 @@ class TestConcurrencyProcess < Test::Unit::TestCase
|
|
20
82
|
end
|
21
83
|
|
22
84
|
times = 500
|
23
|
-
t = TSV.setup({"a" => 1}, :type => :single)
|
24
85
|
|
25
86
|
times.times do |i|
|
26
87
|
q.process i
|
@@ -45,6 +106,34 @@ class TestConcurrencyProcess < Test::Unit::TestCase
|
|
45
106
|
end
|
46
107
|
end
|
47
108
|
|
109
|
+
def test_error
|
110
|
+
assert_raise RbbtException do
|
111
|
+
q = RbbtProcessQueue.new 3
|
112
|
+
|
113
|
+
res = []
|
114
|
+
|
115
|
+
q.callback do |v|
|
116
|
+
res << v
|
117
|
+
end
|
118
|
+
|
119
|
+
q.init do |i|
|
120
|
+
raise RbbtException.new "MY ERROR" if i == 300
|
121
|
+
i * 2
|
122
|
+
end
|
123
|
+
|
124
|
+
times = 500
|
125
|
+
|
126
|
+
times.times do |i|
|
127
|
+
q.process i
|
128
|
+
end
|
129
|
+
|
130
|
+
q.join
|
131
|
+
|
132
|
+
assert_equal times, res.length
|
133
|
+
assert_equal [0, 2, 4], res.sort[0..2]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
48
137
|
def test_process_abort
|
49
138
|
assert_raise Aborted do
|
50
139
|
q = RbbtProcessQueue.new 10
|
@@ -60,15 +149,14 @@ class TestConcurrencyProcess < Test::Unit::TestCase
|
|
60
149
|
end
|
61
150
|
|
62
151
|
times = 500
|
63
|
-
t = TSV.setup({"a" => 1}, :type => :single)
|
64
152
|
|
65
153
|
times.times do |i|
|
66
154
|
q.process i
|
67
155
|
end
|
68
156
|
|
69
|
-
sleep
|
157
|
+
sleep 1
|
70
158
|
q.clean
|
71
|
-
|
159
|
+
sleep 1
|
72
160
|
|
73
161
|
q.join
|
74
162
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.22.
|
4
|
+
version: 5.22.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -271,7 +271,6 @@ files:
|
|
271
271
|
- lib/rbbt/util/concurrency.rb
|
272
272
|
- lib/rbbt/util/concurrency/processes.rb
|
273
273
|
- lib/rbbt/util/concurrency/processes/socket.rb
|
274
|
-
- lib/rbbt/util/concurrency/processes/socket_old.rb
|
275
274
|
- lib/rbbt/util/concurrency/processes/worker.rb
|
276
275
|
- lib/rbbt/util/concurrency/threads.rb
|
277
276
|
- lib/rbbt/util/config.rb
|
@@ -1,144 +0,0 @@
|
|
1
|
-
class RbbtProcessQueue
|
2
|
-
class RbbtProcessSocket
|
3
|
-
|
4
|
-
class ClosedSocket < Exception; end
|
5
|
-
|
6
|
-
attr_accessor :sin, :sout, :in_lockfile, :out_lockfile
|
7
|
-
def initialize(lockfile = nil)
|
8
|
-
@sout, @sin = File.pipe
|
9
|
-
|
10
|
-
lockfile ||= TmpFile.tmp_file
|
11
|
-
|
12
|
-
@lockfile = lockfile
|
13
|
-
@in_lockfile = lockfile + '.in'
|
14
|
-
@out_lockfile = lockfile + '.out'
|
15
|
-
raise "in_lockfile exists?" if File.exists? @in_lockfile
|
16
|
-
raise "out_lockfile exists?" if File.exists? @in_lockfile
|
17
|
-
FileUtils.touch @in_lockfile
|
18
|
-
FileUtils.touch @out_lockfile
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.serialize(obj)
|
22
|
-
dump = nil
|
23
|
-
begin
|
24
|
-
case obj
|
25
|
-
when TSV
|
26
|
-
type = "T"
|
27
|
-
info = obj.info
|
28
|
-
info.delete_if{|k,v| v.nil?}
|
29
|
-
dump = Marshal.dump([info, {}.merge(obj)])
|
30
|
-
else
|
31
|
-
type = "M"
|
32
|
-
dump = Marshal.dump(obj)
|
33
|
-
end
|
34
|
-
payload = [type, dump].pack('A1a*')
|
35
|
-
length = payload.bytesize
|
36
|
-
#Log.info "Writing #{ length }"
|
37
|
-
[length].pack('L') << payload
|
38
|
-
rescue Exception
|
39
|
-
Log.error "Serialize error for: #{Misc.fingerprint obj} - #{Misc.fingerprint dump}"
|
40
|
-
raise $!
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.unserialize(str)
|
45
|
-
begin
|
46
|
-
c, dump = str.unpack("A1a*")
|
47
|
-
case c
|
48
|
-
when "M"
|
49
|
-
return Marshal.load(dump)
|
50
|
-
when "T"
|
51
|
-
info, hash = Marshal.load(dump)
|
52
|
-
return TSV.setup(hash, info)
|
53
|
-
end
|
54
|
-
rescue Exception
|
55
|
-
Log.error "Unserialize error for: #{Misc.fingerprint str}"
|
56
|
-
raise $!
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def read_sout(length)
|
61
|
-
str = ""
|
62
|
-
str << sout.readpartial(length-str.length) while str.length < length
|
63
|
-
str
|
64
|
-
end
|
65
|
-
|
66
|
-
def write_sin(str)
|
67
|
-
str_length = str.length
|
68
|
-
wrote = 0
|
69
|
-
wrote += sin.write_nonblock(str[wrote..-1]) while wrote < str_length
|
70
|
-
end
|
71
|
-
|
72
|
-
def push(obj)
|
73
|
-
Filelock in_lockfile do
|
74
|
-
payload = RbbtProcessSocket.serialize(obj)
|
75
|
-
sin << payload
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
|
80
|
-
def pop
|
81
|
-
r = []
|
82
|
-
|
83
|
-
payload = begin
|
84
|
-
Filelock out_lockfile do
|
85
|
-
raise ClosedQueue if sout.eof?
|
86
|
-
r,w,e = IO.select([sout], [], [], 1)
|
87
|
-
raise TryAgain if r.empty?
|
88
|
-
|
89
|
-
first_char = read_sout(4)
|
90
|
-
length = first_char.unpack('L').first
|
91
|
-
#Log.info "Reading #{ length }"
|
92
|
-
read_sout(length)
|
93
|
-
end
|
94
|
-
rescue TryAgain
|
95
|
-
sleep 1
|
96
|
-
end
|
97
|
-
|
98
|
-
RbbtProcessSocket.unserialize(payload)
|
99
|
-
end
|
100
|
-
|
101
|
-
def pop
|
102
|
-
loop do
|
103
|
-
r,w,e = IO.select([sout], [], [], 1)
|
104
|
-
next if r.empty?
|
105
|
-
break
|
106
|
-
end
|
107
|
-
|
108
|
-
first_char = read_sout(4)
|
109
|
-
length = first_char.unpack('L').first
|
110
|
-
#Log.info "Reading #{ length }"
|
111
|
-
read_sout(length)
|
112
|
-
end
|
113
|
-
rescue TryAgain
|
114
|
-
sleep 1
|
115
|
-
end
|
116
|
-
|
117
|
-
RbbtProcessSocket.unserialize(payload)
|
118
|
-
end
|
119
|
-
|
120
|
-
def rest
|
121
|
-
sin.close
|
122
|
-
str = sout.read
|
123
|
-
res = []
|
124
|
-
|
125
|
-
while not str.empty?
|
126
|
-
first_char = str[0]
|
127
|
-
next if first_char.nil?
|
128
|
-
length = first_char.unpack("C").first
|
129
|
-
dump = str[1..length]
|
130
|
-
res << Marshal.load(dump)
|
131
|
-
str = str[length+1..-1]
|
132
|
-
end
|
133
|
-
|
134
|
-
res
|
135
|
-
end
|
136
|
-
|
137
|
-
def clean
|
138
|
-
FileUtils.rm @in_lockfile if File.exists? @in_lockfile
|
139
|
-
FileUtils.rm @out_lockfile if File.exists? @out_lockfile
|
140
|
-
sin.close unless sin.closed?
|
141
|
-
sout.close unless sout.closed?
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|