rbbt-util 5.10.1 → 5.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,8 @@ module Workflow
26
26
  end
27
27
 
28
28
  def dep(*dependency_list, &block)
29
+ @dependency_list ||= []
30
+ @dependencies ||= []
29
31
  dependency_list << block if block_given?
30
32
  dependencies.concat dependency_list
31
33
  end
@@ -102,7 +102,8 @@ class Step
102
102
  def join
103
103
  case @result
104
104
  when IO
105
- while @result.read 2048; end
105
+ while @result.read 2048; Thread.pass end unless @result.closed? or @result.eof?
106
+ @result.join if @result.respond_to? :join
106
107
  @result = nil
107
108
  end
108
109
 
@@ -126,7 +127,7 @@ class Step
126
127
 
127
128
  def run(no_load = false)
128
129
 
129
- result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => false do
130
+ result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load ? :stream : false do
130
131
  if Step === Step.log_relay_step and not self == Step.log_relay_step
131
132
  relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
132
133
  end
@@ -190,12 +191,18 @@ class Step
190
191
  end
191
192
 
192
193
  case result
193
- when IO, TSV::Dumper
194
- log :streaming, "#{Log.color :magenta, "Streaming task result"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
195
- class << result
196
- attr_accessor :callback
194
+ when IO, StringIO
195
+ log :streaming, "#{Log.color :magenta, "Streaming task result IO"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
196
+ ConcurrentStream.setup result do
197
+ eee 1
198
+ set_info :done, (done_time = Time.now)
199
+ set_info :time_elapsed, (time_elapsed = done_time - start_time)
200
+ log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
197
201
  end
198
- result.callback = Proc.new do
202
+ when TSV::Dumper
203
+ log :streaming, "#{Log.color :magenta, "Streaming task result TSV::Dumper"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
204
+ ConcurrentStream.setup result.stream do
205
+ set_info :done, (done_time = Time.now)
199
206
  set_info :done, (done_time = Time.now)
200
207
  set_info :time_elapsed, (time_elapsed = done_time - start_time)
201
208
  log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
@@ -256,6 +263,7 @@ class Step
256
263
  exit -1
257
264
  end
258
265
  set_info :pid, nil
266
+ exit 0
259
267
  ensure
260
268
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
261
269
  end
@@ -297,7 +305,7 @@ class Step
297
305
  end
298
306
 
299
307
  def load
300
- return prepare_result @result, @task.result_description if @result
308
+ return prepare_result @result, @task.result_description if @result and not @path == @result
301
309
  join if not done?
302
310
  return Persist.load_file(@path, @task.result_type) if @path.exists?
303
311
  exec
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Monitor throughput
10
+
11
+ $ rbbt tsv throughput
12
+
13
+ Display summary information. Works with Tokyocabinet HDB and BDB as well.
14
+
15
+ -h--help Help
16
+ EOF
17
+
18
+ SOPT.usage if options[:help]
19
+
20
+ last = start = Time.now
21
+ count = 0
22
+ max = 0
23
+ avg = 0
24
+ all = []
25
+ scale = 5
26
+ while line = STDIN.gets
27
+ count += 1
28
+ if Time.now - last >= 1.0 / scale
29
+ Log.clear_line
30
+ puts "#{ count*scale } per second. Max #{max*scale}. Average #{avg*scale}"
31
+ last = Time.now
32
+ max = count > max ? count : max
33
+ all << count
34
+ avg = Misc.mean(all).to_i if all.length > 3
35
+ count = 0
36
+ end
37
+ end
38
+
39
+ all << count
40
+
41
+ puts "Total #{Misc.sum(all).to_i} in #{(Time.now - start).to_i} seconds -- #{(Misc.sum(all).to_f / (Time.now - start)).to_i } per second. Max #{max*scale}. Average #{avg*scale}"
@@ -53,20 +53,25 @@ options[:app].split(/,|\s/).collect do |app|
53
53
  app_dirs[app] = report
54
54
  end if options[:app]
55
55
 
56
- puts <<EOF
57
56
 
58
- #{ report_jobs workflow_dir}
59
- #{ app_dirs.collect{|d,report| report } * "\n" }
57
+ puts <<EOF
58
+ # LOCKS
59
+ #{ CMD.cmd("find #{Rbbt.share.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read << CMD.cmd("find #{Rbbt.var.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read }
60
+ EOF
60
61
 
62
+ puts <<EOF
61
63
  # LOCKED TSV
62
64
  #{ Rbbt.var.tsv_open_locks.glob('*').collect{|f| "- " << File.basename(f) } * "\n" }
65
+ EOF
63
66
 
64
- # LOCKS
65
- #{ CMD.cmd("find #{Rbbt.share.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read << CMD.cmd("find #{Rbbt.var.find_all.collect{|f| "'#{f}'" } * " " } -name '*.lock'").read }
66
-
67
+ puts <<EOF
67
68
  # PERSIST
68
69
  #{ CMD.cmd("find #{Rbbt.share.find_all.collect{|f| "'#{f}'" } * " " } -name '*.persist'").read << CMD.cmd("find #{Rbbt.var.find_all.collect{|f| "'#{f}'" } * " " } -name '*.persist'").read }
69
70
  EOF
70
71
 
71
-
72
+ puts <<EOF
73
+ # JOBS
74
+ #{ report_jobs workflow_dir}
75
+ #{ app_dirs.collect{|d,report| report } * "\n" }
76
+ EOF
72
77
 
@@ -404,10 +404,11 @@ when (defined?(WorkflowRESTClient) and WorkflowRESTClient::RemoteStep)
404
404
  when Step
405
405
  if IO === res.result
406
406
  io = res.result
407
- while line = io.gets do
408
- out.puts line
409
- end
410
- io.close
407
+ Thread.pass while IO.select([io]).nil?
408
+ while block = io.read(2048) do
409
+ out.write block
410
+ end unless io.closed?
411
+ io.join if io.respond_to? :join
411
412
  else
412
413
  res.join
413
414
  out.puts Open.read(res.path) if File.exists? res.path
@@ -12,7 +12,7 @@ end
12
12
 
13
13
  class TestPersist < Test::Unit::TestCase
14
14
 
15
- def test_array_persist
15
+ def _test_array_persist
16
16
  TmpFile.with_file do |tmp|
17
17
  10.times do
18
18
  assert_equal ["1", "2"],(Persist.persist("Test", :array, :file => tmp) do
@@ -50,7 +50,6 @@ class TestPersist < Test::Unit::TestCase
50
50
  dumper.add key, key + " - 2"
51
51
  end
52
52
  dumper.close
53
- Thread.exit
54
53
  end
55
54
  dumper
56
55
  end
@@ -48,15 +48,8 @@ class TestTSVParallelThrough < Test::Unit::TestCase
48
48
  def test_traverse_stream
49
49
  require 'rbbt/sources/organism'
50
50
 
51
- head = 100
51
+ head = 1000
52
52
 
53
- tsv = Organism.identifiers("Hsa").open
54
- res = {}
55
- TSV.traverse tsv, :head => head do |k,v|
56
- res[k] = v
57
- end
58
-
59
- assert_equal head, res.keys.compact.sort.length
60
53
  tsv = Organism.identifiers("Hsa").open
61
54
  res = {}
62
55
  TSV.traverse tsv, :head => head, :into => res do |k,v|
@@ -69,7 +62,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
69
62
  def test_traverse_stream_cpus
70
63
  require 'rbbt/sources/organism'
71
64
 
72
- head = 100
65
+ head = 1000
73
66
 
74
67
  tsv = Organism.identifiers("Hsa")
75
68
  res = {}
@@ -83,7 +76,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
83
76
  def test_traverse_stream_keys
84
77
  require 'rbbt/sources/organism'
85
78
 
86
- head = 100
79
+ head = 1000
87
80
 
88
81
  tsv = Organism.identifiers("Hsa").open
89
82
  res = []
@@ -92,7 +85,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
92
85
  res << v
93
86
  end
94
87
 
95
- assert_equal res, Organism.identifiers("Hsa").tsv(:head => 100).keys
88
+ assert_equal res, Organism.identifiers("Hsa").tsv(:head => head).keys
96
89
 
97
90
  tsv = Organism.identifiers("Hsa").open
98
91
  res = []
@@ -101,7 +94,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
101
94
  v
102
95
  end
103
96
 
104
- assert_equal res, Organism.identifiers("Hsa").tsv(:head => 100).keys
97
+ assert_equal res.sort, Organism.identifiers("Hsa").tsv(:head => head).keys.sort
105
98
  end
106
99
 
107
100
  def test_traverse_array
@@ -165,7 +158,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
165
158
  def test_traverse_benchmark
166
159
  require 'rbbt/sources/organism'
167
160
 
168
- head = 8_000
161
+ head = 2_000
169
162
 
170
163
  tsv = Organism.identifiers("Hsa").open
171
164
  Misc.benchmark do
@@ -182,6 +175,153 @@ class TestTSVParallelThrough < Test::Unit::TestCase
182
175
  [k,v]
183
176
  end
184
177
  end
178
+ end
179
+
180
+ def test_traverse_into_dumper
181
+ require 'rbbt/sources/organism'
182
+
183
+ head = 2_000
184
+
185
+ stream = Organism.identifiers("Hsa").open
186
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
187
+ dumper.init
188
+ TSV.traverse stream, :head => head, :into => dumper do |k,v|
189
+ k = k.first
190
+ [k,v]
191
+ end
192
+
193
+ res = TSV.open(dumper.stream)
194
+
195
+ assert_equal head, res.size
196
+ end
197
+
198
+ def test_traverse_into_dumper_threads
199
+ require 'rbbt/sources/organism'
200
+
201
+ head = 2_000
202
+ threads = 10
203
+
204
+ stream = Organism.identifiers("Hsa").open
205
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
206
+ dumper.init
207
+
208
+ TSV.traverse stream, :threads => threads, :head => head, :into => dumper do |k,v|
209
+ k = k.first
210
+ [k,v]
211
+ end
212
+
213
+ res = TSV.open(StringIO.new(dumper.stream.read))
214
+
215
+ assert_equal head, res.size
216
+ end
217
+
218
+ def test_traverse_into_dumper_cpus
219
+ require 'rbbt/sources/organism'
220
+
221
+ head = 2_000
222
+ cpus = 10
223
+
224
+ stream = Organism.identifiers("Hsa").open
225
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
226
+ dumper.init
227
+ TSV.traverse stream, :cpus => cpus, :head => head, :into => dumper do |k,v|
228
+ k = k.first
229
+ [k,v]
230
+ end
231
+
232
+ res = TSV.open(dumper.stream)
233
+
234
+ assert_equal head, res.size
235
+ end
236
+
237
+ #{{{ TRAVERSE DUMPER
238
+
239
+ def test_traverse_dumper
240
+ require 'rbbt/sources/organism'
185
241
 
242
+ head = 2_000
243
+
244
+ tsv = TSV::Parser.new Organism.identifiers("Hsa").open, :head => head
245
+ dumper = TSV::Dumper.new tsv.options
246
+
247
+ TSV.traverse tsv, :head => head, :into => dumper do |k,v|
248
+ k = k.first
249
+ [k,v]
250
+ end
251
+
252
+ res = {}
253
+ TSV.traverse dumper.stream, :into => res do |k,v|
254
+ [k, v.length]
255
+ end
256
+
257
+ assert_equal head, res.size
258
+ end
259
+
260
+ def test_traverse_dumper_threads
261
+ require 'rbbt/sources/organism'
262
+
263
+ head = 2_000
264
+ threads = 3
265
+
266
+ tsv = TSV::Parser.new Organism.identifiers("Hsa").open, :head => head
267
+ dumper = TSV::Dumper.new tsv.options
268
+
269
+ TSV.traverse tsv, :head => head, :threads => threads, :into => dumper do |k,v|
270
+ k = k.first
271
+ [k,v]
272
+ end
273
+
274
+ res = {}
275
+ TSV.traverse dumper.stream, :threads => threads, :into => res do |k,v|
276
+ [k, v.length]
277
+ end
278
+
279
+ assert_equal head, res.size
280
+ end
281
+
282
+ def test_traverse_dumper_cpus
283
+ require 'rbbt/sources/organism'
284
+
285
+ head = 10_000
286
+ cpus = nil
287
+
288
+ stream = Organism.identifiers("Hsa").open
289
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa").tsv_options
290
+
291
+ TSV.traverse stream, :head => head, :cpus => cpus, :into => dumper do |k,v|
292
+ k = k.first
293
+ [k,v]
294
+ end
295
+
296
+ res = {}
297
+ TSV.traverse dumper.stream, :cpus => cpus, :into => res do |k,v|
298
+ [k, v.length]
299
+ end
300
+
301
+ assert_equal head, res.size
302
+ end
303
+
304
+ def test_traverse_dumper_cpus_exception
305
+ require 'rbbt/sources/organism'
306
+
307
+ head = 2_000
308
+ cpus = 2
309
+
310
+ stream = Organism.identifiers("Hsa/jun2011").open
311
+ dumper = TSV::Dumper.new Organism.identifiers("Hsa/jun2011").tsv_options
312
+
313
+ assert_raise do
314
+ begin
315
+ TSV.traverse stream, :head => head, :cpus => cpus, :into => dumper do |k,v|
316
+ k = k.first
317
+ raise "STOP" if rand(100) < 1
318
+ [k,v]
319
+ end
320
+ dumper.stream.read
321
+ rescue Exception
322
+ Log.exception $!
323
+ raise $!
324
+ end
325
+ end
186
326
  end
187
327
  end
@@ -27,7 +27,6 @@ class TestConcurrencyProcess < Test::Unit::TestCase
27
27
  end
28
28
 
29
29
  q.join
30
- q.clean
31
30
 
32
31
  assert_equal times, res.length
33
32
  assert_equal [0, 2, 4], res.sort[0..2]
@@ -115,17 +115,154 @@ eum fugiat quo voluptas nulla pariatur?"
115
115
  assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
116
116
  end
117
117
 
118
- # def test_pdf2text_example
119
- # assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
120
- # end
121
- #
122
- # def test_pdf2text_EPAR
123
- # assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
124
- # end
125
- #
126
- # def test_pdf2text_wrong
127
- # assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
128
- # end
118
+ def test_pipe
119
+ sout, sin = Misc.pipe
120
+ assert_equal 1, Misc::OPEN_PIPE_IN.length
121
+ sin.close
122
+ assert sout.eof?
123
+ Misc.purge_pipes
124
+ assert_equal 0, Misc::OPEN_PIPE_IN.length
125
+ end
126
+
127
+ def test_pipe_fork
128
+ sout, sin = Misc.pipe
129
+ pid = Process.fork do
130
+ Misc.purge_pipes(sin)
131
+ sleep 2
132
+ sin.close
133
+ end
134
+ sin.close
135
+ assert sout.eof?
136
+ Process.kill :INT, pid
137
+ end
138
+
139
+ def test_open_pipe
140
+ t = 5
141
+ stream = Misc.open_pipe do |sin|
142
+ t.times do |i|
143
+ sleep 0.5
144
+ sin.puts "LINE #{ i }"
145
+ end
146
+ end
147
+
148
+ time = Time.now
149
+ lines = []
150
+ while line = stream.gets
151
+ lines << line.strip
152
+ end
153
+ time_spent = Time.new - time
154
+
155
+ assert time_spent >= t * 0.5
156
+ assert time_spent <= (t+1) * 0.5
157
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
158
+ end
159
+
160
+ def test_open_pipe_fork
161
+ t = 5
162
+ stream = Misc.open_pipe(true) do |sin|
163
+ t.times do |i|
164
+ sleep 0.5
165
+ sin.puts "LINE #{ i }"
166
+ end
167
+ end
168
+
169
+ time = Time.now
170
+ lines = []
171
+ while line = stream.gets
172
+ lines << line.strip
173
+ end
174
+ time_spent = Time.new - time
175
+
176
+ assert time_spent >= t * 0.5
177
+ assert time_spent <= (t+1) * 0.5
178
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
179
+ end
180
+
181
+ def test_open_pipe_fork_cascade
182
+ t = 500
183
+ sleep_time = 2.0 / t
184
+ time = Time.now
185
+
186
+ stream1 = Misc.open_pipe(true) do |sin|
187
+ t.times do |i|
188
+ sleep sleep_time
189
+ sin.puts "LINE #{ i }"
190
+ end
191
+ end
192
+
193
+ stream2 = Misc.open_pipe(true) do |sin|
194
+ while line = stream1.gets
195
+ sin.puts line.strip.reverse
196
+ end
197
+ end
198
+
199
+ stream3 = Misc.open_pipe(true) do |sin|
200
+ while line = stream2.gets
201
+ sin.puts line.downcase
202
+ end
203
+ end
204
+
205
+ lines = []
206
+ while line = stream3.gets
207
+ lines << line.strip
208
+ end
209
+
210
+ time_spent = Time.new - time
211
+
212
+ assert time_spent >= t * sleep_time
213
+ assert time_spent <= t * 1.2 * sleep_time
214
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }".reverse.downcase}, lines
215
+ end
216
+
217
+ def test_tee_stream
218
+ t = 500
219
+ sleep_time = 2.0 / t
220
+ time = Time.now
221
+
222
+ stream1 = Misc.open_pipe(true) do |sin|
223
+ t.times do |i|
224
+ sleep sleep_time
225
+ sin.puts "LINE #{ i }"
226
+ end
227
+ end
228
+
229
+ stream2, stream3 = Misc.tee_stream stream1
230
+
231
+ stream4 = Misc.open_pipe(true) do |sin|
232
+ while line = stream2.gets
233
+ sin.puts line.strip.reverse
234
+ end
235
+ end
236
+
237
+ stream5 = Misc.open_pipe(true) do |sin|
238
+ while line = stream3.gets
239
+ sin.puts line.strip.downcase
240
+ end
241
+ end
242
+
243
+ lines1 = []
244
+ th1 = Thread.new do
245
+ while line = stream4.gets
246
+ lines1 << line.strip
247
+ end
248
+ end
249
+
250
+ lines2 = []
251
+ th2 = Thread.new do
252
+ while line = stream5.gets
253
+ lines2 << line.strip
254
+ end
255
+ end
256
+ th1.join and th2.join
257
+
258
+ time_spent = Time.new - time
259
+
260
+ assert time_spent >= t * sleep_time
261
+ assert time_spent <= t * 1.2 * sleep_time
262
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }".reverse}, lines1
263
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }".downcase}, lines2
264
+ end
265
+
129
266
 
130
267
  def test_string2hash
131
268
  assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
@@ -294,26 +431,6 @@ eum fugiat quo voluptas nulla pariatur?"
294
431
  assert_equal "COSMIC", Misc.camel_case("COSMIC")
295
432
  end
296
433
 
297
- def test_pipe
298
- t = 5
299
- stream = Misc.open_pipe do |sin|
300
- t.times do |i|
301
- sleep 0.5
302
- sin.puts "LINE #{ i }"
303
- end
304
- end
305
-
306
- time = Time.now
307
- lines = []
308
- while line = stream.gets
309
- lines << line.strip
310
- end
311
- time_spent = Time.new - time
312
-
313
- assert time_spent >= t * 0.5
314
- assert time_spent <= (t+1) * 0.5
315
- assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
316
- end
317
434
 
318
435
  def __test_lock_fd
319
436
  require 'rbbt/workflow'