rbbt-util 5.27.6 → 5.27.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/stream.rb +1 -0
- data/lib/rbbt/util/misc/pipes.rb +37 -7
- data/lib/rbbt/workflow/step/dependencies.rb +1 -1
- data/test/rbbt/tsv/test_stream.rb +4 -3
- data/test/rbbt/util/misc/test_pipes.rb +1 -1
- data/test/rbbt/workflow/step/test_dependencies.rb +68 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6551d632ae1f2289805a53ceaf9db01d0c0a029ea6c76176aed419e4d6a2a485
|
4
|
+
data.tar.gz: 72562729535554e718451adb87338503ff40a81874a9e0fed6f8eaf4e6aa0edc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d8c3ff2861dd4bbd835e3e41826f5fd6452bdd0b40fbd8378737effdffa6da051ce221e24f4bbacaf952725ccf7da09b533c8c414a7fd10480369ae528a4259
|
7
|
+
data.tar.gz: 1790b8f38f91dfbb582a394d86953156d90351a8d724fb9535a3bc72ebac5fbbfe24e88fe10cf943e2ed91ff059af68cf581bac333f5480096f85cf6e29b8018
|
data/lib/rbbt/tsv/stream.rb
CHANGED
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -18,6 +18,8 @@ module Misc
|
|
18
18
|
|
19
19
|
BLOCK_SIZE=1024 * 8
|
20
20
|
|
21
|
+
SKIP_TAG="[SKIP TAG]"
|
22
|
+
|
21
23
|
PIPE_MUTEX = Mutex.new
|
22
24
|
|
23
25
|
OPEN_PIPE_IN = []
|
@@ -29,7 +31,7 @@ module Misc
|
|
29
31
|
|
30
32
|
[sout, sin]
|
31
33
|
end
|
32
|
-
Log.debug{"Creating pipe #{[res.last.inspect,res.first.inspect] * " => "}"}
|
34
|
+
Log.debug{"Creating pipe #{[res.last.inspect, res.first.inspect] * " => "}"}
|
33
35
|
res
|
34
36
|
end
|
35
37
|
|
@@ -255,6 +257,11 @@ module Misc
|
|
255
257
|
end
|
256
258
|
tee1, *rest = Misc.tee_stream stream_dup, num + 1
|
257
259
|
stream.reopen(tee1)
|
260
|
+
|
261
|
+
#ToDo: I can't explain why the @threads variable appears with the value of
|
262
|
+
# @filename
|
263
|
+
stream.instance_variable_set(:@threads, nil) if stream.instance_variables.include?(:@threads)
|
264
|
+
|
258
265
|
tee1.annotate(stream)
|
259
266
|
rest
|
260
267
|
end
|
@@ -537,18 +544,29 @@ module Misc
|
|
537
544
|
end
|
538
545
|
end
|
539
546
|
|
547
|
+
def self.buffer_stream(stream)
|
548
|
+
sout, sin = Misc.pipe
|
549
|
+
Misc.consume_stream(stream, true, sin)
|
550
|
+
sout
|
551
|
+
end
|
552
|
+
|
540
553
|
def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
|
541
554
|
output.puts header if header
|
542
555
|
streams = streams.collect do |stream|
|
543
556
|
if defined? Step and Step === stream
|
544
|
-
stream.get_stream
|
557
|
+
io = stream.get_stream
|
558
|
+
if io
|
559
|
+
buffer_stream(io)
|
560
|
+
else
|
561
|
+
stream.join.path.open
|
562
|
+
end
|
545
563
|
else
|
546
564
|
stream
|
547
565
|
end
|
548
566
|
end
|
549
567
|
|
550
568
|
begin
|
551
|
-
|
569
|
+
|
552
570
|
lines ||= streams.collect{|s| s.gets }
|
553
571
|
keys = []
|
554
572
|
parts = []
|
@@ -564,6 +582,7 @@ module Misc
|
|
564
582
|
end
|
565
583
|
sizes = parts.collect{|p| p.nil? ? 0 : p.length }
|
566
584
|
last_min = nil
|
585
|
+
|
567
586
|
while lines.compact.any?
|
568
587
|
if block_given?
|
569
588
|
min = keys.compact.sort(&block).first
|
@@ -571,14 +590,23 @@ module Misc
|
|
571
590
|
min = keys.compact.sort.first
|
572
591
|
end
|
573
592
|
str = []
|
593
|
+
threads = []
|
574
594
|
keys.each_with_index do |key,i|
|
575
595
|
case key
|
576
596
|
when min
|
577
|
-
|
597
|
+
if parts[i] == [SKIP_TAG]
|
598
|
+
str << [sep * (sizes[i]-1)] if sizes[i] > 0
|
599
|
+
else
|
600
|
+
str << [parts[i] * sep]
|
601
|
+
end
|
602
|
+
|
578
603
|
line = lines[i] = streams[i].gets
|
579
|
-
|
604
|
+
|
605
|
+
if line.nil?
|
580
606
|
keys[i] = nil
|
581
607
|
parts[i] = nil
|
608
|
+
streams[i].close unless streams[i].closed?
|
609
|
+
streams[i].join if streams[i].respond_to?(:join)
|
582
610
|
else
|
583
611
|
k, *p = line.chomp.split(sep, -1)
|
584
612
|
keys[i] = k
|
@@ -589,10 +617,12 @@ module Misc
|
|
589
617
|
end
|
590
618
|
end
|
591
619
|
|
592
|
-
output.puts [min, str*sep] * sep
|
620
|
+
output.puts [min, str.flatten*sep] * sep
|
593
621
|
end
|
622
|
+
|
594
623
|
streams.each do |stream|
|
595
|
-
stream.
|
624
|
+
stream.close unless stream.closed?
|
625
|
+
stream.join if stream.respond_to?(:join)
|
596
626
|
end
|
597
627
|
rescue
|
598
628
|
Log.exception $!
|
@@ -366,7 +366,7 @@ class Step
|
|
366
366
|
(step.dependencies + step.input_dependencies).each do |step_dep|
|
367
367
|
next if step_dep.done? or step_dep.running? or (ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
|
368
368
|
dep_step[step_dep.path] ||= []
|
369
|
-
dep_step[step_dep.path] <<
|
369
|
+
dep_step[step_dep.path] << step
|
370
370
|
end
|
371
371
|
end
|
372
372
|
|
@@ -4,6 +4,7 @@ require 'rbbt/tsv/stream'
|
|
4
4
|
require 'rbbt'
|
5
5
|
|
6
6
|
class TestStream < Test::Unit::TestCase
|
7
|
+
|
7
8
|
def test_collapse_stream
|
8
9
|
text=<<-EOF
|
9
10
|
#: :sep=" "
|
@@ -65,7 +66,7 @@ row3 AAA BBB CCC
|
|
65
66
|
|
66
67
|
text2=<<-EOF
|
67
68
|
#: :sep=" "
|
68
|
-
#Row Labela Labelb
|
69
|
+
#Row Labela Labelb
|
69
70
|
row1 a b
|
70
71
|
row3 aaa bbb
|
71
72
|
row2 aa bb
|
@@ -100,7 +101,7 @@ row1 A B C
|
|
100
101
|
|
101
102
|
text2=<<-EOF
|
102
103
|
#: :sep=" "
|
103
|
-
#Row Labela Labelb
|
104
|
+
#Row Labela Labelb
|
104
105
|
row2 aa bb
|
105
106
|
EOF
|
106
107
|
|
@@ -132,7 +133,7 @@ row1 A B C
|
|
132
133
|
|
133
134
|
text2=<<-EOF
|
134
135
|
#: :sep=" "
|
135
|
-
#Row Labela Labelb
|
136
|
+
#Row Labela Labelb
|
136
137
|
row2 aa bb
|
137
138
|
EOF
|
138
139
|
|
@@ -4,6 +4,27 @@ require 'rbbt/workflow'
|
|
4
4
|
module DepWorkflow
|
5
5
|
extend Workflow
|
6
6
|
|
7
|
+
input :input_file, :file, "Input file", nil, :stream => true
|
8
|
+
task :s1 => :array do |input_file|
|
9
|
+
TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
|
10
|
+
line + "\t" << "Task1"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
dep :s1
|
15
|
+
task :s2 => :array do |input_file|
|
16
|
+
TSV.traverse step(:s1), :type => :array, :into => :stream, :bar => "Task2" do |line|
|
17
|
+
next [line.split("\t").first, Misc::SKIP_TAG] * "\t" if rand < 0.9
|
18
|
+
line + "\t" << "Task2"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
dep :s1
|
23
|
+
dep :s2
|
24
|
+
task :s3 => :array do |input_file|
|
25
|
+
Misc.paste_streams(dependencies.reverse)
|
26
|
+
end
|
27
|
+
|
7
28
|
input :input_file, :file, "Input file", nil, :stream => true
|
8
29
|
task :task1 => :array do |input_file|
|
9
30
|
TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
|
@@ -28,9 +49,7 @@ module DepWorkflow
|
|
28
49
|
dep :task2
|
29
50
|
dep :task3
|
30
51
|
task :task4 => :array do
|
31
|
-
|
32
|
-
s2 = TSV.get_stream step(:task3)
|
33
|
-
Misc.paste_streams([s1, s2])
|
52
|
+
Misc.paste_streams(dependencies)
|
34
53
|
end
|
35
54
|
|
36
55
|
dep :task4
|
@@ -40,12 +59,10 @@ module DepWorkflow
|
|
40
59
|
end
|
41
60
|
end
|
42
61
|
|
43
|
-
dep :task5
|
44
62
|
dep :task2
|
63
|
+
dep :task5
|
45
64
|
task :task6 => :array do
|
46
|
-
|
47
|
-
s2 = TSV.get_stream step(:task5)
|
48
|
-
Misc.paste_streams([s1, s2])
|
65
|
+
Misc.paste_streams(dependencies)
|
49
66
|
end
|
50
67
|
|
51
68
|
input :stream_file, :file, "Streamed file", nil, :stream => true
|
@@ -134,8 +151,25 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
134
151
|
end
|
135
152
|
end
|
136
153
|
|
154
|
+
def test_task3
|
155
|
+
size = 100000
|
156
|
+
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
157
|
+
TmpFile.with_file(content) do |input_file|
|
158
|
+
job = DepWorkflow.job(:task3, "TEST", :input_file => input_file)
|
159
|
+
io = TSV.get_stream job.run(:stream)
|
160
|
+
last_line = nil
|
161
|
+
while line = io.gets
|
162
|
+
last_line = line.strip
|
163
|
+
end
|
164
|
+
io.join
|
165
|
+
|
166
|
+
assert_equal "Line #{size}\tTask1\tTask3", last_line
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
137
170
|
def test_task4
|
138
|
-
size =
|
171
|
+
size = 100000
|
172
|
+
Log.severity = 0
|
139
173
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
140
174
|
last_line = nil
|
141
175
|
TmpFile.with_file(content) do |input_file|
|
@@ -151,7 +185,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
151
185
|
end
|
152
186
|
|
153
187
|
def test_task5
|
154
|
-
size =
|
188
|
+
size = 10000
|
155
189
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
156
190
|
last_line = nil
|
157
191
|
TmpFile.with_file(content) do |input_file|
|
@@ -165,9 +199,32 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
165
199
|
assert_equal "Line #{size}\tTask1\tTask2\tTask1\tTask3\tTask5", last_line
|
166
200
|
end
|
167
201
|
|
202
|
+
def test_s3
|
203
|
+
size = 100000
|
204
|
+
content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
205
|
+
last_line = nil
|
206
|
+
Log.severity = 0
|
207
|
+
TmpFile.with_file(content) do |input_file|
|
208
|
+
begin
|
209
|
+
job = DepWorkflow.job(:s3, "TEST", :input_file => input_file)
|
210
|
+
job.recursive_clean
|
211
|
+
job.run(:stream)
|
212
|
+
io = TSV.get_stream job
|
213
|
+
while line = io.gets
|
214
|
+
last_line = line.strip
|
215
|
+
end
|
216
|
+
io.join if io.respond_to? :join
|
217
|
+
rescue Exception
|
218
|
+
job.abort
|
219
|
+
raise $!
|
220
|
+
end
|
221
|
+
end
|
222
|
+
assert last_line.include? "Line #{size}"
|
223
|
+
end
|
224
|
+
|
168
225
|
def test_task6
|
169
226
|
size = 100000
|
170
|
-
content = (
|
227
|
+
content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
171
228
|
last_line = nil
|
172
229
|
Log.severity = 0
|
173
230
|
TmpFile.with_file(content) do |input_file|
|
@@ -189,7 +246,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
189
246
|
end
|
190
247
|
|
191
248
|
def test_task8
|
192
|
-
size =
|
249
|
+
size = 10000
|
193
250
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
194
251
|
last_line = nil
|
195
252
|
Log.severity = 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.27.
|
4
|
+
version: 5.27.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|