rbbt-util 5.27.6 → 5.27.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/stream.rb +1 -0
- data/lib/rbbt/util/misc/pipes.rb +37 -7
- data/lib/rbbt/workflow/step/dependencies.rb +1 -1
- data/test/rbbt/tsv/test_stream.rb +4 -3
- data/test/rbbt/util/misc/test_pipes.rb +1 -1
- data/test/rbbt/workflow/step/test_dependencies.rb +68 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6551d632ae1f2289805a53ceaf9db01d0c0a029ea6c76176aed419e4d6a2a485
|
4
|
+
data.tar.gz: 72562729535554e718451adb87338503ff40a81874a9e0fed6f8eaf4e6aa0edc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d8c3ff2861dd4bbd835e3e41826f5fd6452bdd0b40fbd8378737effdffa6da051ce221e24f4bbacaf952725ccf7da09b533c8c414a7fd10480369ae528a4259
|
7
|
+
data.tar.gz: 1790b8f38f91dfbb582a394d86953156d90351a8d724fb9535a3bc72ebac5fbbfe24e88fe10cf943e2ed91ff059af68cf581bac333f5480096f85cf6e29b8018
|
data/lib/rbbt/tsv/stream.rb
CHANGED
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -18,6 +18,8 @@ module Misc
|
|
18
18
|
|
19
19
|
BLOCK_SIZE=1024 * 8
|
20
20
|
|
21
|
+
SKIP_TAG="[SKIP TAG]"
|
22
|
+
|
21
23
|
PIPE_MUTEX = Mutex.new
|
22
24
|
|
23
25
|
OPEN_PIPE_IN = []
|
@@ -29,7 +31,7 @@ module Misc
|
|
29
31
|
|
30
32
|
[sout, sin]
|
31
33
|
end
|
32
|
-
Log.debug{"Creating pipe #{[res.last.inspect,res.first.inspect] * " => "}"}
|
34
|
+
Log.debug{"Creating pipe #{[res.last.inspect, res.first.inspect] * " => "}"}
|
33
35
|
res
|
34
36
|
end
|
35
37
|
|
@@ -255,6 +257,11 @@ module Misc
|
|
255
257
|
end
|
256
258
|
tee1, *rest = Misc.tee_stream stream_dup, num + 1
|
257
259
|
stream.reopen(tee1)
|
260
|
+
|
261
|
+
#ToDo: I can't explain why the @threads variable appears with the value of
|
262
|
+
# @filename
|
263
|
+
stream.instance_variable_set(:@threads, nil) if stream.instance_variables.include?(:@threads)
|
264
|
+
|
258
265
|
tee1.annotate(stream)
|
259
266
|
rest
|
260
267
|
end
|
@@ -537,18 +544,29 @@ module Misc
|
|
537
544
|
end
|
538
545
|
end
|
539
546
|
|
547
|
+
def self.buffer_stream(stream)
|
548
|
+
sout, sin = Misc.pipe
|
549
|
+
Misc.consume_stream(stream, true, sin)
|
550
|
+
sout
|
551
|
+
end
|
552
|
+
|
540
553
|
def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
|
541
554
|
output.puts header if header
|
542
555
|
streams = streams.collect do |stream|
|
543
556
|
if defined? Step and Step === stream
|
544
|
-
stream.get_stream
|
557
|
+
io = stream.get_stream
|
558
|
+
if io
|
559
|
+
buffer_stream(io)
|
560
|
+
else
|
561
|
+
stream.join.path.open
|
562
|
+
end
|
545
563
|
else
|
546
564
|
stream
|
547
565
|
end
|
548
566
|
end
|
549
567
|
|
550
568
|
begin
|
551
|
-
|
569
|
+
|
552
570
|
lines ||= streams.collect{|s| s.gets }
|
553
571
|
keys = []
|
554
572
|
parts = []
|
@@ -564,6 +582,7 @@ module Misc
|
|
564
582
|
end
|
565
583
|
sizes = parts.collect{|p| p.nil? ? 0 : p.length }
|
566
584
|
last_min = nil
|
585
|
+
|
567
586
|
while lines.compact.any?
|
568
587
|
if block_given?
|
569
588
|
min = keys.compact.sort(&block).first
|
@@ -571,14 +590,23 @@ module Misc
|
|
571
590
|
min = keys.compact.sort.first
|
572
591
|
end
|
573
592
|
str = []
|
593
|
+
threads = []
|
574
594
|
keys.each_with_index do |key,i|
|
575
595
|
case key
|
576
596
|
when min
|
577
|
-
|
597
|
+
if parts[i] == [SKIP_TAG]
|
598
|
+
str << [sep * (sizes[i]-1)] if sizes[i] > 0
|
599
|
+
else
|
600
|
+
str << [parts[i] * sep]
|
601
|
+
end
|
602
|
+
|
578
603
|
line = lines[i] = streams[i].gets
|
579
|
-
|
604
|
+
|
605
|
+
if line.nil?
|
580
606
|
keys[i] = nil
|
581
607
|
parts[i] = nil
|
608
|
+
streams[i].close unless streams[i].closed?
|
609
|
+
streams[i].join if streams[i].respond_to?(:join)
|
582
610
|
else
|
583
611
|
k, *p = line.chomp.split(sep, -1)
|
584
612
|
keys[i] = k
|
@@ -589,10 +617,12 @@ module Misc
|
|
589
617
|
end
|
590
618
|
end
|
591
619
|
|
592
|
-
output.puts [min, str*sep] * sep
|
620
|
+
output.puts [min, str.flatten*sep] * sep
|
593
621
|
end
|
622
|
+
|
594
623
|
streams.each do |stream|
|
595
|
-
stream.
|
624
|
+
stream.close unless stream.closed?
|
625
|
+
stream.join if stream.respond_to?(:join)
|
596
626
|
end
|
597
627
|
rescue
|
598
628
|
Log.exception $!
|
@@ -366,7 +366,7 @@ class Step
|
|
366
366
|
(step.dependencies + step.input_dependencies).each do |step_dep|
|
367
367
|
next if step_dep.done? or step_dep.running? or (ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
|
368
368
|
dep_step[step_dep.path] ||= []
|
369
|
-
dep_step[step_dep.path] <<
|
369
|
+
dep_step[step_dep.path] << step
|
370
370
|
end
|
371
371
|
end
|
372
372
|
|
@@ -4,6 +4,7 @@ require 'rbbt/tsv/stream'
|
|
4
4
|
require 'rbbt'
|
5
5
|
|
6
6
|
class TestStream < Test::Unit::TestCase
|
7
|
+
|
7
8
|
def test_collapse_stream
|
8
9
|
text=<<-EOF
|
9
10
|
#: :sep=" "
|
@@ -65,7 +66,7 @@ row3 AAA BBB CCC
|
|
65
66
|
|
66
67
|
text2=<<-EOF
|
67
68
|
#: :sep=" "
|
68
|
-
#Row Labela Labelb
|
69
|
+
#Row Labela Labelb
|
69
70
|
row1 a b
|
70
71
|
row3 aaa bbb
|
71
72
|
row2 aa bb
|
@@ -100,7 +101,7 @@ row1 A B C
|
|
100
101
|
|
101
102
|
text2=<<-EOF
|
102
103
|
#: :sep=" "
|
103
|
-
#Row Labela Labelb
|
104
|
+
#Row Labela Labelb
|
104
105
|
row2 aa bb
|
105
106
|
EOF
|
106
107
|
|
@@ -132,7 +133,7 @@ row1 A B C
|
|
132
133
|
|
133
134
|
text2=<<-EOF
|
134
135
|
#: :sep=" "
|
135
|
-
#Row Labela Labelb
|
136
|
+
#Row Labela Labelb
|
136
137
|
row2 aa bb
|
137
138
|
EOF
|
138
139
|
|
@@ -4,6 +4,27 @@ require 'rbbt/workflow'
|
|
4
4
|
module DepWorkflow
|
5
5
|
extend Workflow
|
6
6
|
|
7
|
+
input :input_file, :file, "Input file", nil, :stream => true
|
8
|
+
task :s1 => :array do |input_file|
|
9
|
+
TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
|
10
|
+
line + "\t" << "Task1"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
dep :s1
|
15
|
+
task :s2 => :array do |input_file|
|
16
|
+
TSV.traverse step(:s1), :type => :array, :into => :stream, :bar => "Task2" do |line|
|
17
|
+
next [line.split("\t").first, Misc::SKIP_TAG] * "\t" if rand < 0.9
|
18
|
+
line + "\t" << "Task2"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
dep :s1
|
23
|
+
dep :s2
|
24
|
+
task :s3 => :array do |input_file|
|
25
|
+
Misc.paste_streams(dependencies.reverse)
|
26
|
+
end
|
27
|
+
|
7
28
|
input :input_file, :file, "Input file", nil, :stream => true
|
8
29
|
task :task1 => :array do |input_file|
|
9
30
|
TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
|
@@ -28,9 +49,7 @@ module DepWorkflow
|
|
28
49
|
dep :task2
|
29
50
|
dep :task3
|
30
51
|
task :task4 => :array do
|
31
|
-
|
32
|
-
s2 = TSV.get_stream step(:task3)
|
33
|
-
Misc.paste_streams([s1, s2])
|
52
|
+
Misc.paste_streams(dependencies)
|
34
53
|
end
|
35
54
|
|
36
55
|
dep :task4
|
@@ -40,12 +59,10 @@ module DepWorkflow
|
|
40
59
|
end
|
41
60
|
end
|
42
61
|
|
43
|
-
dep :task5
|
44
62
|
dep :task2
|
63
|
+
dep :task5
|
45
64
|
task :task6 => :array do
|
46
|
-
|
47
|
-
s2 = TSV.get_stream step(:task5)
|
48
|
-
Misc.paste_streams([s1, s2])
|
65
|
+
Misc.paste_streams(dependencies)
|
49
66
|
end
|
50
67
|
|
51
68
|
input :stream_file, :file, "Streamed file", nil, :stream => true
|
@@ -134,8 +151,25 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
134
151
|
end
|
135
152
|
end
|
136
153
|
|
154
|
+
def test_task3
|
155
|
+
size = 100000
|
156
|
+
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
157
|
+
TmpFile.with_file(content) do |input_file|
|
158
|
+
job = DepWorkflow.job(:task3, "TEST", :input_file => input_file)
|
159
|
+
io = TSV.get_stream job.run(:stream)
|
160
|
+
last_line = nil
|
161
|
+
while line = io.gets
|
162
|
+
last_line = line.strip
|
163
|
+
end
|
164
|
+
io.join
|
165
|
+
|
166
|
+
assert_equal "Line #{size}\tTask1\tTask3", last_line
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
137
170
|
def test_task4
|
138
|
-
size =
|
171
|
+
size = 100000
|
172
|
+
Log.severity = 0
|
139
173
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
140
174
|
last_line = nil
|
141
175
|
TmpFile.with_file(content) do |input_file|
|
@@ -151,7 +185,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
151
185
|
end
|
152
186
|
|
153
187
|
def test_task5
|
154
|
-
size =
|
188
|
+
size = 10000
|
155
189
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
156
190
|
last_line = nil
|
157
191
|
TmpFile.with_file(content) do |input_file|
|
@@ -165,9 +199,32 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
165
199
|
assert_equal "Line #{size}\tTask1\tTask2\tTask1\tTask3\tTask5", last_line
|
166
200
|
end
|
167
201
|
|
202
|
+
def test_s3
|
203
|
+
size = 100000
|
204
|
+
content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
205
|
+
last_line = nil
|
206
|
+
Log.severity = 0
|
207
|
+
TmpFile.with_file(content) do |input_file|
|
208
|
+
begin
|
209
|
+
job = DepWorkflow.job(:s3, "TEST", :input_file => input_file)
|
210
|
+
job.recursive_clean
|
211
|
+
job.run(:stream)
|
212
|
+
io = TSV.get_stream job
|
213
|
+
while line = io.gets
|
214
|
+
last_line = line.strip
|
215
|
+
end
|
216
|
+
io.join if io.respond_to? :join
|
217
|
+
rescue Exception
|
218
|
+
job.abort
|
219
|
+
raise $!
|
220
|
+
end
|
221
|
+
end
|
222
|
+
assert last_line.include? "Line #{size}"
|
223
|
+
end
|
224
|
+
|
168
225
|
def test_task6
|
169
226
|
size = 100000
|
170
|
-
content = (
|
227
|
+
content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
171
228
|
last_line = nil
|
172
229
|
Log.severity = 0
|
173
230
|
TmpFile.with_file(content) do |input_file|
|
@@ -189,7 +246,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
189
246
|
end
|
190
247
|
|
191
248
|
def test_task8
|
192
|
-
size =
|
249
|
+
size = 10000
|
193
250
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
194
251
|
last_line = nil
|
195
252
|
Log.severity = 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.27.
|
4
|
+
version: 5.27.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|