rbbt-util 5.13.37 → 5.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +6 -1
- data/lib/rbbt/fix_width_table.rb +21 -9
- data/lib/rbbt/monitor.rb +1 -1
- data/lib/rbbt/packed_index.rb +19 -5
- data/lib/rbbt/persist/tsv.rb +9 -1
- data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
- data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
- data/lib/rbbt/persist/tsv/sharder.rb +11 -3
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/rake.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -13
- data/lib/rbbt/tsv/dumper.rb +2 -6
- data/lib/rbbt/tsv/manipulate.rb +6 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
- data/lib/rbbt/tsv/parser.rb +20 -16
- data/lib/rbbt/tsv/stream.rb +87 -76
- data/lib/rbbt/tsv/util.rb +8 -3
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +0 -3
- data/lib/rbbt/util/concurrency/processes.rb +3 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
- data/lib/rbbt/util/log.rb +45 -18
- data/lib/rbbt/util/log/progress/report.rb +3 -2
- data/lib/rbbt/util/log/progress/util.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
- data/lib/rbbt/util/misc/development.rb +10 -4
- data/lib/rbbt/util/misc/lock.rb +1 -1
- data/lib/rbbt/util/misc/omics.rb +2 -0
- data/lib/rbbt/util/misc/pipes.rb +90 -87
- data/lib/rbbt/workflow.rb +6 -2
- data/lib/rbbt/workflow/accessor.rb +70 -40
- data/lib/rbbt/workflow/definition.rb +23 -0
- data/lib/rbbt/workflow/step.rb +15 -3
- data/lib/rbbt/workflow/step/run.rb +18 -13
- data/lib/rbbt/workflow/usage.rb +3 -0
- data/share/Rlib/util.R +1 -1
- data/share/rbbt_commands/tsv/get +0 -2
- data/share/rbbt_commands/tsv/info +13 -5
- data/share/rbbt_commands/tsv/subset +1 -1
- data/share/rbbt_commands/workflow/info +32 -0
- data/share/rbbt_commands/workflow/task +0 -2
- data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
- data/test/rbbt/test_fix_width_table.rb +1 -0
- data/test/rbbt/test_packed_index.rb +3 -0
- data/test/rbbt/tsv/test_stream.rb +55 -2
- data/test/rbbt/util/misc/test_pipes.rb +8 -6
- data/test/rbbt/workflow/test_step.rb +7 -6
- metadata +3 -2
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -162,8 +162,7 @@ module TSV
|
|
162
162
|
desc = @monitor[:desc] if @monitor.include? :desc
|
163
163
|
step = @monitor[:step] if @monitor.include? :step
|
164
164
|
end
|
165
|
-
|
166
|
-
progress_monitor = Log::ProgressBar.new(size, :desc => desc)
|
165
|
+
progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc)
|
167
166
|
else
|
168
167
|
progress_monitor = nil
|
169
168
|
end
|
@@ -192,10 +191,10 @@ module TSV
|
|
192
191
|
when :flat, :single
|
193
192
|
prepare_entity(value, traverser.new_field_names.first, entity_options)
|
194
193
|
end
|
195
|
-
|
196
194
|
end
|
197
195
|
|
198
196
|
|
197
|
+
|
199
198
|
if zipped
|
200
199
|
|
201
200
|
keys.each_with_index do |k,i|
|
@@ -224,8 +223,11 @@ module TSV
|
|
224
223
|
end
|
225
224
|
|
226
225
|
end
|
226
|
+
|
227
227
|
end
|
228
228
|
|
229
|
+
Log::ProgressBar.remove_bar progress_monitor if progress_monitor
|
230
|
+
|
229
231
|
[traverser.new_key_field_name, traverser.new_field_names]
|
230
232
|
end
|
231
233
|
|
@@ -415,7 +417,7 @@ module TSV
|
|
415
417
|
case
|
416
418
|
when (Array === method and (key == :key or key_field == key))
|
417
419
|
with_unnamed do
|
418
|
-
Annotated.purge(method).uniq
|
420
|
+
TSV.traverse(Annotated.purge(method).uniq, :bar => true){|key|
|
419
421
|
new[key] = self[key] if invert ^ (self.include? key)
|
420
422
|
}
|
421
423
|
end
|
@@ -38,7 +38,6 @@ module TSV
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
rescue Exception
|
41
|
-
Log.exception $!
|
42
41
|
nil
|
43
42
|
end
|
44
43
|
end
|
@@ -186,8 +185,9 @@ module TSV
|
|
186
185
|
end
|
187
186
|
else
|
188
187
|
options[:monitor] = bar
|
189
|
-
TSV::Parser.traverse(io, options, &block)
|
188
|
+
TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
|
190
189
|
end
|
190
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
191
191
|
join.call if join
|
192
192
|
end
|
193
193
|
|
@@ -213,7 +213,7 @@ module TSV
|
|
213
213
|
else
|
214
214
|
obj.traverse(options, &block)
|
215
215
|
end
|
216
|
-
when IO, File
|
216
|
+
when IO, File, StringIO
|
217
217
|
begin
|
218
218
|
if options[:type] == :array
|
219
219
|
traverse_io_array(obj, options, &block)
|
@@ -392,10 +392,11 @@ module TSV
|
|
392
392
|
end
|
393
393
|
true
|
394
394
|
rescue Aborted, Interrupt
|
395
|
-
Log.medium "Aborted storing into #{Misc.fingerprint store}
|
395
|
+
Log.medium "Aborted storing into #{Misc.fingerprint store}"
|
396
396
|
stream = obj_stream(store)
|
397
397
|
stream.abort if stream.respond_to? :abort
|
398
398
|
rescue Exception
|
399
|
+
Log.medium "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
|
399
400
|
stream = obj_stream(store)
|
400
401
|
stream.abort if stream.respond_to? :abort
|
401
402
|
raise $!
|
@@ -462,7 +463,7 @@ module TSV
|
|
462
463
|
thread = Thread.new(Thread.current) do |parent|
|
463
464
|
begin
|
464
465
|
traverse_run(obj, threads, cpus, options, &block)
|
465
|
-
into.close if into.respond_to? :
|
466
|
+
into.close if into.respond_to?(:close) and not (into.respond_to? :closed? and into.closed?)
|
466
467
|
rescue Exception
|
467
468
|
stream = obj_stream(obj)
|
468
469
|
stream.abort if stream and stream.respond_to? :abort
|
@@ -557,7 +558,7 @@ module TSV
|
|
557
558
|
traverse_stream(obj, threads, cpus, options, &block)
|
558
559
|
else
|
559
560
|
traverse_run(obj, threads, cpus, options, &block)
|
560
|
-
into.close if into.respond_to? :
|
561
|
+
into.close if into.respond_to?(:close) and not (into.respond_to? :closed and into.closed?)
|
561
562
|
end
|
562
563
|
|
563
564
|
into
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
2
|
module TSV
|
3
3
|
class Parser
|
4
|
-
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
|
4
|
+
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble
|
5
5
|
|
6
6
|
class SKIP_LINE < Exception; end
|
7
7
|
class END_PARSING < Exception; end
|
@@ -13,20 +13,22 @@ module TSV
|
|
13
13
|
|
14
14
|
def parse_header(stream)
|
15
15
|
options = {}
|
16
|
+
@preamble = []
|
16
17
|
|
17
18
|
# Get line
|
18
19
|
|
19
20
|
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
20
21
|
line = stream.gets
|
21
|
-
|
22
|
-
|
23
|
-
line.chomp
|
22
|
+
return {} if line.nil?
|
23
|
+
#raise "Empty content: #{ stream.inspect }" if line.nil?
|
24
|
+
line = Misc.fixutf8 line.chomp
|
24
25
|
|
25
26
|
# Process options line
|
26
27
|
|
27
28
|
if line and line =~ /^#{@header_hash}: (.*)/
|
28
|
-
options = Misc.string2hash $1.
|
29
|
-
line =
|
29
|
+
options = Misc.string2hash $1.chomp
|
30
|
+
line = stream.gets
|
31
|
+
line = Misc.fixutf8 line.chomp if line
|
30
32
|
end
|
31
33
|
|
32
34
|
# Determine separator
|
@@ -35,16 +37,20 @@ module TSV
|
|
35
37
|
|
36
38
|
# Process fields line
|
37
39
|
|
40
|
+
preamble << line if line
|
38
41
|
while line and Misc.fixutf8(line) =~ /^#{@header_hash}/
|
39
|
-
line.chomp!
|
40
42
|
@fields = line.split(@sep)
|
41
43
|
@key_field = @fields.shift
|
42
44
|
@key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character
|
43
45
|
|
44
46
|
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
45
|
-
line = @header_hash != "" ?
|
47
|
+
line = (@header_hash != "" ? stream.gets : nil)
|
48
|
+
line = Misc.fixutf8 line.chomp if line
|
49
|
+
preamble << line if line
|
46
50
|
end
|
47
51
|
|
52
|
+
@preamble = preamble[0..-3] * "\n"
|
53
|
+
|
48
54
|
line ||= stream.gets
|
49
55
|
|
50
56
|
@first_line = line
|
@@ -112,7 +118,7 @@ module TSV
|
|
112
118
|
[]
|
113
119
|
else
|
114
120
|
parts.values_at *field_positions
|
115
|
-
end.collect{|value| value.split(@sep2, -1)}
|
121
|
+
end.collect{|value| value.nil? ? [] : value.split(@sep2, -1) }
|
116
122
|
[keys, values]
|
117
123
|
end
|
118
124
|
|
@@ -482,6 +488,7 @@ module TSV
|
|
482
488
|
# first line
|
483
489
|
line = self.rescue_first_line
|
484
490
|
|
491
|
+
progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
|
485
492
|
# setup monitor
|
486
493
|
if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
|
487
494
|
size = case
|
@@ -497,8 +504,6 @@ module TSV
|
|
497
504
|
step = monitor[:step] if monitor.include? :step
|
498
505
|
end
|
499
506
|
progress_monitor = Log::ProgressBar.new(size, :desc => desc)
|
500
|
-
else
|
501
|
-
progress_monitor = nil
|
502
507
|
end
|
503
508
|
|
504
509
|
# parser
|
@@ -507,7 +512,8 @@ module TSV
|
|
507
512
|
|
508
513
|
while not line.nil?
|
509
514
|
begin
|
510
|
-
progress_monitor.tick(stream.pos) if progress_monitor
|
515
|
+
#progress_monitor.tick(stream.pos) if progress_monitor
|
516
|
+
progress_monitor.tick if progress_monitor
|
511
517
|
|
512
518
|
raise SKIP_LINE if line.empty?
|
513
519
|
|
@@ -520,8 +526,6 @@ module TSV
|
|
520
526
|
|
521
527
|
yield key, values
|
522
528
|
|
523
|
-
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
524
|
-
|
525
529
|
line = stream.gets
|
526
530
|
|
527
531
|
line_num += 1
|
@@ -541,13 +545,13 @@ module TSV
|
|
541
545
|
raise $!
|
542
546
|
rescue Exception
|
543
547
|
Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
|
544
|
-
stream.abort if stream.respond_to? :abort
|
548
|
+
stream.abort $! if stream.respond_to? :abort
|
545
549
|
raise $!
|
546
550
|
end
|
547
551
|
end
|
548
552
|
|
549
553
|
ensure
|
550
|
-
stream.close
|
554
|
+
stream.close unless stream.closed?
|
551
555
|
stream.join if stream.respond_to? :join
|
552
556
|
end
|
553
557
|
|
data/lib/rbbt/tsv/stream.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'rbbt/tsv/parser'
|
2
1
|
require 'rbbt/tsv/dumper'
|
3
2
|
module TSV
|
4
3
|
|
@@ -17,56 +16,26 @@ module TSV
|
|
17
16
|
dumper
|
18
17
|
end
|
19
18
|
|
20
|
-
def self.paste_streams(
|
21
|
-
options = Misc.add_defaults options, :sep => "\t", :sort =>
|
22
|
-
sort = Misc.process_options options, :sort
|
23
|
-
|
24
|
-
input_streams = []
|
25
|
-
input_lines = []
|
26
|
-
input_fields = []
|
27
|
-
input_key_fields = []
|
28
|
-
input_options = []
|
29
|
-
|
30
|
-
input_source_streams = inputs.collect do |input|
|
31
|
-
stream = sort ? Misc.sort_stream(input) : TSV.get_stream(input)
|
32
|
-
stream
|
33
|
-
end
|
19
|
+
def self.paste_streams(streams, options = {})
|
20
|
+
options = Misc.add_defaults options, :sep => "\t", :sort => true
|
21
|
+
sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble
|
34
22
|
|
35
|
-
input_source_streams.each do |stream|
|
36
|
-
parser = TSV::Parser.new stream, options
|
37
|
-
input_streams << parser.stream
|
38
|
-
input_lines << parser.first_line
|
39
|
-
input_fields << parser.fields
|
40
|
-
input_key_fields << parser.key_field
|
41
|
-
input_options << parser.options
|
42
|
-
end
|
43
23
|
|
44
|
-
key_field = input_key_fields.first
|
45
|
-
fields = input_fields.flatten
|
46
|
-
options = options.merge(input_options.first)
|
47
24
|
|
48
|
-
|
49
|
-
dumper.close_in
|
50
|
-
dumper.close_out
|
51
|
-
header = TSV.header_lines(key_field, fields, options)
|
52
|
-
dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
|
53
|
-
dumper
|
54
|
-
end
|
25
|
+
out = Misc.open_pipe do |sin|
|
55
26
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
27
|
+
streams = streams.collect do |stream|
|
28
|
+
case stream
|
29
|
+
when (defined? Step and Step)
|
30
|
+
stream.grace
|
31
|
+
stream.get_stream || stream.join.path.open
|
32
|
+
when Path
|
33
|
+
stream.open
|
34
|
+
else
|
35
|
+
stream
|
36
|
+
end
|
66
37
|
end
|
67
|
-
end
|
68
38
|
|
69
|
-
out = Misc.open_pipe do |sin|
|
70
39
|
num_streams = streams.length
|
71
40
|
|
72
41
|
streams = streams.collect do |stream|
|
@@ -75,12 +44,13 @@ module TSV
|
|
75
44
|
sorted
|
76
45
|
end if sort
|
77
46
|
|
78
|
-
lines
|
79
|
-
fields
|
80
|
-
sizes
|
81
|
-
key_fields
|
47
|
+
lines = []
|
48
|
+
fields = []
|
49
|
+
sizes = []
|
50
|
+
key_fields = []
|
82
51
|
input_options = []
|
83
|
-
empty
|
52
|
+
empty = []
|
53
|
+
preambles = []
|
84
54
|
|
85
55
|
streams = streams.collect do |stream|
|
86
56
|
parser = TSV::Parser.new stream, options
|
@@ -88,8 +58,9 @@ module TSV
|
|
88
58
|
empty << stream if parser.first_line.nil?
|
89
59
|
key_fields << parser.key_field
|
90
60
|
fields << parser.fields
|
91
|
-
sizes << parser.fields.length
|
61
|
+
sizes << parser.fields.length if parser.fields
|
92
62
|
input_options << parser.options
|
63
|
+
preambles << parser.preamble if TrueClass === preamble and not parser.preamble.empty?
|
93
64
|
|
94
65
|
parser.stream
|
95
66
|
end
|
@@ -98,12 +69,20 @@ module TSV
|
|
98
69
|
fields = fields.compact.flatten
|
99
70
|
options = options.merge(input_options.first)
|
100
71
|
|
101
|
-
|
72
|
+
preamble_txt = case preamble
|
73
|
+
when TrueClass
|
74
|
+
preambles * "\n"
|
75
|
+
when String
|
76
|
+
preamble
|
77
|
+
else
|
78
|
+
nil
|
79
|
+
end
|
80
|
+
|
81
|
+
header = TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
|
82
|
+
sin.puts header
|
102
83
|
|
103
84
|
empty_pos = empty.collect{|stream| streams.index stream }
|
104
85
|
empty_pos.sort.reverse.each do |i|
|
105
|
-
lines.delete_at i
|
106
|
-
fields.delete_at i
|
107
86
|
key_fields.delete_at i
|
108
87
|
input_options.delete_at i
|
109
88
|
end
|
@@ -114,10 +93,18 @@ module TSV
|
|
114
93
|
keys = []
|
115
94
|
parts = []
|
116
95
|
lines.each_with_index do |line,i|
|
117
|
-
|
118
|
-
|
119
|
-
|
96
|
+
if line.nil?
|
97
|
+
keys[i] = nil
|
98
|
+
parts[i] = nil
|
99
|
+
else
|
100
|
+
vs = line.chomp.split(sep, -1)
|
101
|
+
key, *p = vs
|
102
|
+
keys[i] = key
|
103
|
+
parts[i] = p
|
104
|
+
end
|
105
|
+
sizes[i] ||= parts[i].length-1 unless parts[i].nil?
|
120
106
|
end
|
107
|
+
|
121
108
|
last_min = nil
|
122
109
|
while lines.compact.any?
|
123
110
|
min = keys.compact.sort.first
|
@@ -125,38 +112,62 @@ module TSV
|
|
125
112
|
keys.each_with_index do |key,i|
|
126
113
|
case key
|
127
114
|
when min
|
128
|
-
str <<
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
115
|
+
str << parts[i] * sep
|
116
|
+
|
117
|
+
begin
|
118
|
+
line = lines[i] = begin
|
119
|
+
streams[i].gets
|
120
|
+
rescue
|
121
|
+
Log.exception $!
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
if line.nil?
|
125
|
+
stream = streams[i]
|
126
|
+
keys[i] = nil
|
127
|
+
parts[i] = nil
|
128
|
+
else
|
129
|
+
k, *p = line.chomp.split(sep, -1)
|
130
|
+
raise TryAgain if k == keys[i]
|
131
|
+
keys[i] = k
|
132
|
+
parts[i] = p.collect{|e| e.nil? ? "" : e }
|
133
|
+
end
|
134
|
+
rescue TryAgain
|
135
|
+
Log.warn "Skipping repeated key in stream #{i}: #{keys[i]}"
|
136
|
+
retry
|
144
137
|
end
|
145
138
|
else
|
146
|
-
|
139
|
+
if sizes[i] > 0
|
140
|
+
p = sep * (sizes[i]-1)
|
141
|
+
str << p
|
142
|
+
end
|
147
143
|
end
|
148
144
|
end
|
149
145
|
|
150
|
-
|
146
|
+
values = str.inject(nil) do |acc,part|
|
147
|
+
if acc.nil?
|
148
|
+
acc = part.dup
|
149
|
+
else
|
150
|
+
acc << sep << part
|
151
|
+
end
|
152
|
+
acc
|
153
|
+
end
|
154
|
+
text = [min, values] * sep
|
155
|
+
sin.puts text
|
151
156
|
end
|
152
157
|
|
153
158
|
streams.each do |stream|
|
154
159
|
stream.join if stream.respond_to? :join
|
155
160
|
end
|
161
|
+
rescue Aborted
|
162
|
+
Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
163
|
+
streams.each do |stream|
|
164
|
+
stream.abort if stream.respond_to? :abort
|
165
|
+
end
|
166
|
+
raise $!
|
156
167
|
rescue Exception
|
157
168
|
Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
158
169
|
streams.each do |stream|
|
159
|
-
stream.abort
|
170
|
+
stream.abort if stream.respond_to? :abort
|
160
171
|
end
|
161
172
|
raise $!
|
162
173
|
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -142,11 +142,16 @@ module TSV
|
|
142
142
|
|
143
143
|
|
144
144
|
|
145
|
-
def self.header_lines(key_field, fields, entry_hash =
|
146
|
-
|
145
|
+
def self.header_lines(key_field, fields, entry_hash = nil)
|
146
|
+
if Hash === entry_hash
|
147
|
+
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
148
|
+
preamble = entry_hash[:preamble]
|
149
|
+
end
|
150
|
+
|
151
|
+
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
147
152
|
|
148
153
|
str = ""
|
149
|
-
str <<
|
154
|
+
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
150
155
|
if fields
|
151
156
|
str << "#" << key_field << sep << fields * sep << "\n"
|
152
157
|
end
|