rbbt-util 5.10.1 → 5.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -26,6 +26,8 @@ module Workflow
26
26
  end
27
27
 
28
28
  def dep(*dependency_list, &block)
29
+ @dependency_list ||= []
30
+ @dependencies ||= []
29
31
  dependency_list << block if block_given?
30
32
  dependencies.concat dependency_list
31
33
  end
@@ -102,7 +102,8 @@ class Step
102
102
  def join
103
103
  case @result
104
104
  when IO
105
- while @result.read 2048; end
105
+ while @result.read 2048; Thread.pass end unless @result.closed? or @result.eof?
106
+ @result.join if @result.respond_to? :join
106
107
  @result = nil
107
108
  end
108
109
 
@@ -126,7 +127,7 @@ class Step
126
127
 
127
128
  def run(no_load = false)
128
129
 
129
- result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => false do
130
+ result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load ? :stream : false do
130
131
  if Step === Step.log_relay_step and not self == Step.log_relay_step
131
132
  relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
132
133
  end
@@ -190,12 +191,18 @@ class Step
190
191
  end
191
192
 
192
193
  case result
193
- when IO, TSV::Dumper
194
- log :streaming, "#{Log.color :magenta, "Streaming task result"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
195
- class << result
196
- attr_accessor :callback
194
+ when IO, StringIO
195
+ log :streaming, "#{Log.color :magenta, "Streaming task result IO"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
196
+ ConcurrentStream.setup result do
197
+ eee 1
198
+ set_info :done, (done_time = Time.now)
199
+ set_info :time_elapsed, (time_elapsed = done_time - start_time)
200
+ log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
197
201
  end
198
- result.callback = Proc.new do
202
+ when TSV::Dumper
203
+ log :streaming, "#{Log.color :magenta, "Streaming task result TSV::Dumper"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
204
+ ConcurrentStream.setup result.stream do
205
+ set_info :done, (done_time = Time.now)
199
206
  set_info :done, (done_time = Time.now)
200
207
  set_info :time_elapsed, (time_elapsed = done_time - start_time)
201
208
  log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
@@ -256,6 +263,7 @@ class Step
256
263
  exit -1
257
264
  end
258
265
  set_info :pid, nil
266
+ exit 0
259
267
  ensure
260
268
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
261
269
  end
@@ -297,7 +305,7 @@ class Step
297
305
  end
298
306
 
299
307
  def load
300
- return prepare_result @result, @task.result_description if @result
308
+ return prepare_result @result, @task.result_description if @result and not @path == @result
301
309
  join if not done?
302
310
  return Persist.load_file(@path, @task.result_type) if @path.exists?
303
311
  exec
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Monitor throughput
10
+
11
+ $ rbbt tsv throughput
12
+
13
+ Display summary information. Works with Tokyocabinet HDB and BDB as well.
14
+
15
+ -h--help Help
16
+ EOF
17
+
18
+ SOPT.usage if options[:help]
19
+
20
+ last = start = Time.now
21
+ count = 0
22
+ max = 0
23
+ avg = 0
24
+ all = []
25
+ scale = 5
26
+ while line = STDIN.gets
27
+ count += 1
28
+ if Time.now - last >= 1.0 / scale
29
+ Log.clear_line
30
+ puts "#{ count*scale } per second. Max #{max*scale}. Average #{avg*scale}"
31
+ last = Time.now
32
+ max = count > max ? count : max
33
+ all << count
34
+ avg = Misc.mean(all).to_i if all.length > 3
35
+ count = 0
36
+ end
37
+ end
38
+
39
+ all << count
40
+
41
+ puts "Total #{Misc.sum(all).to_i} in #{(Time.now - start).to_i} seconds -- #{(Misc.sum(all).to_f / (Time.now - start)).to_i } per second. Max #{max*scale}. Average #{avg*scale}"
@@ -53,20 +53,25 @@ options[:app].split(/,|\s/).collect do |app|
53
53
  app_dirs[app] = report
54
54
  end if options[:app]
55
55
 
56
- puts <<EOF
57
56
 
58
- #{ report_jobs workflow_dir}
59
- #{ app_dirs.collect{|d,report| report } * "\n" }
57
+ puts <<EOF
58
+ # LOCKS
59
+ #{ CMD.cmd("find #{Rbbt.share.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read << CMD.cmd("find #{Rbbt.var.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read }
60
+ EOF
60
61
 
62
+ puts <<EOF
61
63
  # LOCKED TSV
62
64
  #{ Rbbt.var.tsv_open_locks.glob('*').collect{|f| "- " << File.basename(f) } * "\n" }
65
+ EOF
63
66
 
64
- # LOCKS
65
- #{ CMD.cmd("find #{Rbbt.share.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read << CMD.cmd("find #{Rbbt.var.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read }
66
-
67
+ puts <<EOF
67
68
  # PERSIST
68
69
  #{ CMD.cmd("find #{Rbbt.share.find_all.collect{|f| "'#{f}'" } * " " } -name '*.persist'").read << CMD.cmd("find #{Rbbt.var.find_all.collect{|f| "'#{f}'" } * " " } -name '*.persist'").read }
69
70
  EOF
70
71
 
71
-
72
+ puts <<EOF
73
+ # JOBS
74
+ #{ report_jobs workflow_dir}
75
+ #{ app_dirs.collect{|d,report| report } * "\n" }
76
+ EOF
72
77
 
@@ -404,10 +404,11 @@ when (defined?(WorkflowRESTClient) and WorkflowRESTClient::RemoteStep)
404
404
  when Step
405
405
  if IO === res.result
406
406
  io = res.result
407
- while line = io.gets do
408
- out.puts line
409
- end
410
- io.close
407
+ Thread.pass while IO.select([io]).nil?
408
+ while block = io.read(2048) do
409
+ out.write block
410
+ end unless io.closed?
411
+ io.join if io.respond_to? :join
411
412
  else
412
413
  res.join
413
414
  out.puts Open.read(res.path) if File.exists? res.path
@@ -12,7 +12,7 @@ end
12
12
 
13
13
  class TestPersist < Test::Unit::TestCase
14
14
 
15
- def test_array_persist
15
+ def _test_array_persist
16
16
  TmpFile.with_file do |tmp|
17
17
  10.times do
18
18
  assert_equal ["1", "2"],(Persist.persist("Test", :array, :file => tmp) do
@@ -50,7 +50,6 @@ class TestPersist < Test::Unit::TestCase
50
50
  dumper.add key, key + " - 2"
51
51
  end
52
52
  dumper.close
53
- Thread.exit
54
53
  end
55
54
  dumper
56
55
  end
@@ -48,15 +48,8 @@ class TestTSVParallelThrough < Test::Unit::TestCase
48
48
  def test_traverse_stream
49
49
  require 'rbbt/sources/organism'
50
50
 
51
- head = 100
51
+ head = 1000
52
52
 
53
- tsv = Organism.identifiers("Hsa").open
54
- res = {}
55
- TSV.traverse tsv, :head => head do |k,v|
56
- res[k] = v
57
- end
58
-
59
- assert_equal head, res.keys.compact.sort.length
60
53
  tsv = Organism.identifiers("Hsa").open
61
54
  res = {}
62
55
  TSV.traverse tsv, :head => head, :into => res do |k,v|
@@ -69,7 +62,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
69
62
  def test_traverse_stream_cpus
70
63
  require 'rbbt/sources/organism'
71
64
 
72
- head = 100
65
+ head = 1000
73
66
 
74
67
  tsv = Organism.identifiers("Hsa")
75
68
  res = {}
@@ -83,7 +76,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
83
76
  def test_traverse_stream_keys
84
77
  require 'rbbt/sources/organism'
85
78
 
86
- head = 100
79
+ head = 1000
87
80
 
88
81
  tsv = Organism.identifiers("Hsa").open
89
82
  res = []
@@ -92,7 +85,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
92
85
  res << v
93
86
  end
94
87
 
95
- assert_equal res, Organism.identifiers("Hsa").tsv(:head => 100).keys
88
+ assert_equal res, Organism.identifiers("Hsa").tsv(:head => head).keys
96
89
 
97
90
  tsv = Organism.identifiers("Hsa").open
98
91
  res = []
@@ -101,7 +94,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
101
94
  v
102
95
  end
103
96
 
104
- assert_equal res, Organism.identifiers("Hsa").tsv(:head => 100).keys
97
+ assert_equal res.sort, Organism.identifiers("Hsa").tsv(:head => head).keys.sort
105
98
  end
106
99
 
107
100
  def test_traverse_array
@@ -165,7 +158,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
165
158
  def test_traverse_benchmark
166
159
  require 'rbbt/sources/organism'
167
160
 
168
- head = 8_000
161
+ head = 2_000
169
162
 
170
163
  tsv = Organism.identifiers("Hsa").open
171
164
  Misc.benchmark do
@@ -182,6 +175,153 @@ class TestTSVParallelThrough < Test::Unit::TestCase
182
175
  [k,v]
183
176
  end
184
177
  end
178
+ end
179
+
180
+ def test_traverse_into_dumper
181
+ require 'rbbt/sources/organism'
182
+
183
+ head = 2_000
184
+
185
+ stream = Organism.identifiers("Hsa").open
186
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
187
+ dumper.init
188
+ TSV.traverse stream, :head => head, :into => dumper do |k,v|
189
+ k = k.first
190
+ [k,v]
191
+ end
192
+
193
+ res = TSV.open(dumper.stream)
194
+
195
+ assert_equal head, res.size
196
+ end
197
+
198
+ def test_traverse_into_dumper_threads
199
+ require 'rbbt/sources/organism'
200
+
201
+ head = 2_000
202
+ threads = 10
203
+
204
+ stream = Organism.identifiers("Hsa").open
205
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
206
+ dumper.init
207
+
208
+ TSV.traverse stream, :threads => threads, :head => head, :into => dumper do |k,v|
209
+ k = k.first
210
+ [k,v]
211
+ end
212
+
213
+ res = TSV.open(StringIO.new(dumper.stream.read))
214
+
215
+ assert_equal head, res.size
216
+ end
217
+
218
+ def test_traverse_into_dumper_cpus
219
+ require 'rbbt/sources/organism'
220
+
221
+ head = 2_000
222
+ cpus = 10
223
+
224
+ stream = Organism.identifiers("Hsa").open
225
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
226
+ dumper.init
227
+ TSV.traverse stream, :cpus => cpus, :head => head, :into => dumper do |k,v|
228
+ k = k.first
229
+ [k,v]
230
+ end
231
+
232
+ res = TSV.open(dumper.stream)
233
+
234
+ assert_equal head, res.size
235
+ end
236
+
237
+ #{{{ TRAVERSE DUMPER
238
+
239
+ def test_traverse_dumper
240
+ require 'rbbt/sources/organism'
185
241
 
242
+ head = 2_000
243
+
244
+ tsv = TSV::Parser.new Organism.identifiers("Hsa").open, :head => head
245
+ dumper = TSV::Dumper.new tsv.options
246
+
247
+ TSV.traverse tsv, :head => head, :into => dumper do |k,v|
248
+ k = k.first
249
+ [k,v]
250
+ end
251
+
252
+ res = {}
253
+ TSV.traverse dumper.stream, :into => res do |k,v|
254
+ [k, v.length]
255
+ end
256
+
257
+ assert_equal head, res.size
258
+ end
259
+
260
+ def test_traverse_dumper_threads
261
+ require 'rbbt/sources/organism'
262
+
263
+ head = 2_000
264
+ threads = 3
265
+
266
+ tsv = TSV::Parser.new Organism.identifiers("Hsa").open, :head => head
267
+ dumper = TSV::Dumper.new tsv.options
268
+
269
+ TSV.traverse tsv, :head => head, :threads => threads, :into => dumper do |k,v|
270
+ k = k.first
271
+ [k,v]
272
+ end
273
+
274
+ res = {}
275
+ TSV.traverse dumper.stream, :threads => threads, :into => res do |k,v|
276
+ [k, v.length]
277
+ end
278
+
279
+ assert_equal head, res.size
280
+ end
281
+
282
+ def test_traverse_dumper_cpus
283
+ require 'rbbt/sources/organism'
284
+
285
+ head = 10_000
286
+ cpus = nil
287
+
288
+ stream = Organism.identifiers("Hsa").open
289
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
290
+
291
+ TSV.traverse stream, :head => head, :cpus => cpus, :into => dumper do |k,v|
292
+ k = k.first
293
+ [k,v]
294
+ end
295
+
296
+ res = {}
297
+ TSV.traverse dumper.stream, :cpus => cpus, :into => res do |k,v|
298
+ [k, v.length]
299
+ end
300
+
301
+ assert_equal head, res.size
302
+ end
303
+
304
+ def test_traverse_dumper_cpus_exception
305
+ require 'rbbt/sources/organism'
306
+
307
+ head = 2_000
308
+ cpus = 2
309
+
310
+ stream = Organism.identifiers("Hsa/jun2011").open
311
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa/jun2011").tsv_options
312
+
313
+ assert_raise do
314
+ begin
315
+ TSV.traverse stream, :head => head, :cpus => cpus, :into => dumper do |k,v|
316
+ k = k.first
317
+ raise "STOP" if rand(100) < 1
318
+ [k,v]
319
+ end
320
+ dumper.stream.read
321
+ rescue Exception
322
+ Log.exception $!
323
+ raise $!
324
+ end
325
+ end
186
326
  end
187
327
  end
@@ -27,7 +27,6 @@ class TestConcurrencyProcess < Test::Unit::TestCase
27
27
  end
28
28
 
29
29
  q.join
30
- q.clean
31
30
 
32
31
  assert_equal times, res.length
33
32
  assert_equal [0, 2, 4], res.sort[0..2]
@@ -115,17 +115,154 @@ eum fugiat quo voluptas nulla pariatur?"
115
115
  assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
116
116
  end
117
117
 
118
- # def test_pdf2text_example
119
- # assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
120
- # end
121
- #
122
- # def test_pdf2text_EPAR
123
- # assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
124
- # end
125
- #
126
- # def test_pdf2text_wrong
127
- # assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
128
- # end
118
+ def test_pipe
119
+ sout, sin = Misc.pipe
120
+ assert_equal 1, Misc::OPEN_PIPE_IN.length
121
+ sin.close
122
+ assert sout.eof?
123
+ Misc.purge_pipes
124
+ assert_equal 0, Misc::OPEN_PIPE_IN.length
125
+ end
126
+
127
+ def test_pipe_fork
128
+ sout, sin = Misc.pipe
129
+ pid = Process.fork do
130
+ Misc.purge_pipes(sin)
131
+ sleep 2
132
+ sin.close
133
+ end
134
+ sin.close
135
+ assert sout.eof?
136
+ Process.kill :INT, pid
137
+ end
138
+
139
+ def test_open_pipe
140
+ t = 5
141
+ stream = Misc.open_pipe do |sin|
142
+ t.times do |i|
143
+ sleep 0.5
144
+ sin.puts "LINE #{ i }"
145
+ end
146
+ end
147
+
148
+ time = Time.now
149
+ lines = []
150
+ while line = stream.gets
151
+ lines << line.strip
152
+ end
153
+ time_spent = Time.new - time
154
+
155
+ assert time_spent >= t * 0.5
156
+ assert time_spent <= (t+1) * 0.5
157
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
158
+ end
159
+
160
+ def test_open_pipe_fork
161
+ t = 5
162
+ stream = Misc.open_pipe(true) do |sin|
163
+ t.times do |i|
164
+ sleep 0.5
165
+ sin.puts "LINE #{ i }"
166
+ end
167
+ end
168
+
169
+ time = Time.now
170
+ lines = []
171
+ while line = stream.gets
172
+ lines << line.strip
173
+ end
174
+ time_spent = Time.new - time
175
+
176
+ assert time_spent >= t * 0.5
177
+ assert time_spent <= (t+1) * 0.5
178
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
179
+ end
180
+
181
+ def test_open_pipe_fork_cascade
182
+ t = 500
183
+ sleep_time = 2.0 / t
184
+ time = Time.now
185
+
186
+ stream1 = Misc.open_pipe(true) do |sin|
187
+ t.times do |i|
188
+ sleep sleep_time
189
+ sin.puts "LINE #{ i }"
190
+ end
191
+ end
192
+
193
+ stream2 = Misc.open_pipe(true) do |sin|
194
+ while line = stream1.gets
195
+ sin.puts line.strip.reverse
196
+ end
197
+ end
198
+
199
+ stream3 = Misc.open_pipe(true) do |sin|
200
+ while line = stream2.gets
201
+ sin.puts line.downcase
202
+ end
203
+ end
204
+
205
+ lines = []
206
+ while line = stream3.gets
207
+ lines << line.strip
208
+ end
209
+
210
+ time_spent = Time.new - time
211
+
212
+ assert time_spent >= t * sleep_time
213
+ assert time_spent <= t * 1.2 * sleep_time
214
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }".reverse.downcase}, lines
215
+ end
216
+
217
+ def test_tee_stream
218
+ t = 500
219
+ sleep_time = 2.0 / t
220
+ time = Time.now
221
+
222
+ stream1 = Misc.open_pipe(true) do |sin|
223
+ t.times do |i|
224
+ sleep sleep_time
225
+ sin.puts "LINE #{ i }"
226
+ end
227
+ end
228
+
229
+ stream2, stream3 = Misc.tee_stream stream1
230
+
231
+ stream4 = Misc.open_pipe(true) do |sin|
232
+ while line = stream2.gets
233
+ sin.puts line.strip.reverse
234
+ end
235
+ end
236
+
237
+ stream5 = Misc.open_pipe(true) do |sin|
238
+ while line = stream3.gets
239
+ sin.puts line.strip.downcase
240
+ end
241
+ end
242
+
243
+ lines1 = []
244
+ th1 = Thread.new do
245
+ while line = stream4.gets
246
+ lines1 << line.strip
247
+ end
248
+ end
249
+
250
+ lines2 = []
251
+ th2 = Thread.new do
252
+ while line = stream5.gets
253
+ lines2 << line.strip
254
+ end
255
+ end
256
+ th1.join and th2.join
257
+
258
+ time_spent = Time.new - time
259
+
260
+ assert time_spent >= t * sleep_time
261
+ assert time_spent <= t * 1.2 * sleep_time
262
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }".reverse}, lines1
263
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }".downcase}, lines2
264
+ end
265
+
129
266
 
130
267
  def test_string2hash
131
268
  assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
@@ -294,26 +431,6 @@ eum fugiat quo voluptas nulla pariatur?"
294
431
  assert_equal "COSMIC", Misc.camel_case("COSMIC")
295
432
  end
296
433
 
297
- def test_pipe
298
- t = 5
299
- stream = Misc.open_pipe do |sin|
300
- t.times do |i|
301
- sleep 0.5
302
- sin.puts "LINE #{ i }"
303
- end
304
- end
305
-
306
- time = Time.now
307
- lines = []
308
- while line = stream.gets
309
- lines << line.strip
310
- end
311
- time_spent = Time.new - time
312
-
313
- assert time_spent >= t * 0.5
314
- assert time_spent <= (t+1) * 0.5
315
- assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
316
- end
317
434
 
318
435
  def __test_lock_fd
319
436
  require 'rbbt/workflow'