rbbt-util 5.13.37 → 5.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +6 -1
- data/lib/rbbt/fix_width_table.rb +21 -9
- data/lib/rbbt/monitor.rb +1 -1
- data/lib/rbbt/packed_index.rb +19 -5
- data/lib/rbbt/persist/tsv.rb +9 -1
- data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
- data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
- data/lib/rbbt/persist/tsv/sharder.rb +11 -3
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/rake.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -13
- data/lib/rbbt/tsv/dumper.rb +2 -6
- data/lib/rbbt/tsv/manipulate.rb +6 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
- data/lib/rbbt/tsv/parser.rb +20 -16
- data/lib/rbbt/tsv/stream.rb +87 -76
- data/lib/rbbt/tsv/util.rb +8 -3
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +0 -3
- data/lib/rbbt/util/concurrency/processes.rb +3 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
- data/lib/rbbt/util/log.rb +45 -18
- data/lib/rbbt/util/log/progress/report.rb +3 -2
- data/lib/rbbt/util/log/progress/util.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
- data/lib/rbbt/util/misc/development.rb +10 -4
- data/lib/rbbt/util/misc/lock.rb +1 -1
- data/lib/rbbt/util/misc/omics.rb +2 -0
- data/lib/rbbt/util/misc/pipes.rb +90 -87
- data/lib/rbbt/workflow.rb +6 -2
- data/lib/rbbt/workflow/accessor.rb +70 -40
- data/lib/rbbt/workflow/definition.rb +23 -0
- data/lib/rbbt/workflow/step.rb +15 -3
- data/lib/rbbt/workflow/step/run.rb +18 -13
- data/lib/rbbt/workflow/usage.rb +3 -0
- data/share/Rlib/util.R +1 -1
- data/share/rbbt_commands/tsv/get +0 -2
- data/share/rbbt_commands/tsv/info +13 -5
- data/share/rbbt_commands/tsv/subset +1 -1
- data/share/rbbt_commands/workflow/info +32 -0
- data/share/rbbt_commands/workflow/task +0 -2
- data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
- data/test/rbbt/test_fix_width_table.rb +1 -0
- data/test/rbbt/test_packed_index.rb +3 -0
- data/test/rbbt/tsv/test_stream.rb +55 -2
- data/test/rbbt/util/misc/test_pipes.rb +8 -6
- data/test/rbbt/workflow/test_step.rb +7 -6
- metadata +3 -2
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -162,8 +162,7 @@ module TSV
|
|
162
162
|
desc = @monitor[:desc] if @monitor.include? :desc
|
163
163
|
step = @monitor[:step] if @monitor.include? :step
|
164
164
|
end
|
165
|
-
|
166
|
-
progress_monitor = Log::ProgressBar.new(size, :desc => desc)
|
165
|
+
progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc)
|
167
166
|
else
|
168
167
|
progress_monitor = nil
|
169
168
|
end
|
@@ -192,10 +191,10 @@ module TSV
|
|
192
191
|
when :flat, :single
|
193
192
|
prepare_entity(value, traverser.new_field_names.first, entity_options)
|
194
193
|
end
|
195
|
-
|
196
194
|
end
|
197
195
|
|
198
196
|
|
197
|
+
|
199
198
|
if zipped
|
200
199
|
|
201
200
|
keys.each_with_index do |k,i|
|
@@ -224,8 +223,11 @@ module TSV
|
|
224
223
|
end
|
225
224
|
|
226
225
|
end
|
226
|
+
|
227
227
|
end
|
228
228
|
|
229
|
+
Log::ProgressBar.remove_bar progress_monitor if progress_monitor
|
230
|
+
|
229
231
|
[traverser.new_key_field_name, traverser.new_field_names]
|
230
232
|
end
|
231
233
|
|
@@ -415,7 +417,7 @@ module TSV
|
|
415
417
|
case
|
416
418
|
when (Array === method and (key == :key or key_field == key))
|
417
419
|
with_unnamed do
|
418
|
-
Annotated.purge(method).uniq
|
420
|
+
TSV.traverse(Annotated.purge(method).uniq, :bar => true){|key|
|
419
421
|
new[key] = self[key] if invert ^ (self.include? key)
|
420
422
|
}
|
421
423
|
end
|
@@ -38,7 +38,6 @@ module TSV
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
rescue Exception
|
41
|
-
Log.exception $!
|
42
41
|
nil
|
43
42
|
end
|
44
43
|
end
|
@@ -186,8 +185,9 @@ module TSV
|
|
186
185
|
end
|
187
186
|
else
|
188
187
|
options[:monitor] = bar
|
189
|
-
TSV::Parser.traverse(io, options, &block)
|
188
|
+
TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
|
190
189
|
end
|
190
|
+
Log::ProgressBar.remove_bar(bar) if bar
|
191
191
|
join.call if join
|
192
192
|
end
|
193
193
|
|
@@ -213,7 +213,7 @@ module TSV
|
|
213
213
|
else
|
214
214
|
obj.traverse(options, &block)
|
215
215
|
end
|
216
|
-
when IO, File
|
216
|
+
when IO, File, StringIO
|
217
217
|
begin
|
218
218
|
if options[:type] == :array
|
219
219
|
traverse_io_array(obj, options, &block)
|
@@ -392,10 +392,11 @@ module TSV
|
|
392
392
|
end
|
393
393
|
true
|
394
394
|
rescue Aborted, Interrupt
|
395
|
-
Log.medium "Aborted storing into #{Misc.fingerprint store}
|
395
|
+
Log.medium "Aborted storing into #{Misc.fingerprint store}"
|
396
396
|
stream = obj_stream(store)
|
397
397
|
stream.abort if stream.respond_to? :abort
|
398
398
|
rescue Exception
|
399
|
+
Log.medium "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
|
399
400
|
stream = obj_stream(store)
|
400
401
|
stream.abort if stream.respond_to? :abort
|
401
402
|
raise $!
|
@@ -462,7 +463,7 @@ module TSV
|
|
462
463
|
thread = Thread.new(Thread.current) do |parent|
|
463
464
|
begin
|
464
465
|
traverse_run(obj, threads, cpus, options, &block)
|
465
|
-
into.close if into.respond_to? :
|
466
|
+
into.close if into.respond_to?(:close) and not (into.respond_to? :closed? and into.closed?)
|
466
467
|
rescue Exception
|
467
468
|
stream = obj_stream(obj)
|
468
469
|
stream.abort if stream and stream.respond_to? :abort
|
@@ -557,7 +558,7 @@ module TSV
|
|
557
558
|
traverse_stream(obj, threads, cpus, options, &block)
|
558
559
|
else
|
559
560
|
traverse_run(obj, threads, cpus, options, &block)
|
560
|
-
into.close if into.respond_to? :
|
561
|
+
into.close if into.respond_to?(:close) and not (into.respond_to? :closed and into.closed?)
|
561
562
|
end
|
562
563
|
|
563
564
|
into
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
2
|
module TSV
|
3
3
|
class Parser
|
4
|
-
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
|
4
|
+
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble
|
5
5
|
|
6
6
|
class SKIP_LINE < Exception; end
|
7
7
|
class END_PARSING < Exception; end
|
@@ -13,20 +13,22 @@ module TSV
|
|
13
13
|
|
14
14
|
def parse_header(stream)
|
15
15
|
options = {}
|
16
|
+
@preamble = []
|
16
17
|
|
17
18
|
# Get line
|
18
19
|
|
19
20
|
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
20
21
|
line = stream.gets
|
21
|
-
|
22
|
-
|
23
|
-
line.chomp
|
22
|
+
return {} if line.nil?
|
23
|
+
#raise "Empty content: #{ stream.inspect }" if line.nil?
|
24
|
+
line = Misc.fixutf8 line.chomp
|
24
25
|
|
25
26
|
# Process options line
|
26
27
|
|
27
28
|
if line and line =~ /^#{@header_hash}: (.*)/
|
28
|
-
options = Misc.string2hash $1.
|
29
|
-
line =
|
29
|
+
options = Misc.string2hash $1.chomp
|
30
|
+
line = stream.gets
|
31
|
+
line = Misc.fixutf8 line.chomp if line
|
30
32
|
end
|
31
33
|
|
32
34
|
# Determine separator
|
@@ -35,16 +37,20 @@ module TSV
|
|
35
37
|
|
36
38
|
# Process fields line
|
37
39
|
|
40
|
+
preamble << line if line
|
38
41
|
while line and Misc.fixutf8(line) =~ /^#{@header_hash}/
|
39
|
-
line.chomp!
|
40
42
|
@fields = line.split(@sep)
|
41
43
|
@key_field = @fields.shift
|
42
44
|
@key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character
|
43
45
|
|
44
46
|
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
45
|
-
line = @header_hash != "" ?
|
47
|
+
line = (@header_hash != "" ? stream.gets : nil)
|
48
|
+
line = Misc.fixutf8 line.chomp if line
|
49
|
+
preamble << line if line
|
46
50
|
end
|
47
51
|
|
52
|
+
@preamble = preamble[0..-3] * "\n"
|
53
|
+
|
48
54
|
line ||= stream.gets
|
49
55
|
|
50
56
|
@first_line = line
|
@@ -112,7 +118,7 @@ module TSV
|
|
112
118
|
[]
|
113
119
|
else
|
114
120
|
parts.values_at *field_positions
|
115
|
-
end.collect{|value| value.split(@sep2, -1)}
|
121
|
+
end.collect{|value| value.nil? ? [] : value.split(@sep2, -1) }
|
116
122
|
[keys, values]
|
117
123
|
end
|
118
124
|
|
@@ -482,6 +488,7 @@ module TSV
|
|
482
488
|
# first line
|
483
489
|
line = self.rescue_first_line
|
484
490
|
|
491
|
+
progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
|
485
492
|
# setup monitor
|
486
493
|
if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
|
487
494
|
size = case
|
@@ -497,8 +504,6 @@ module TSV
|
|
497
504
|
step = monitor[:step] if monitor.include? :step
|
498
505
|
end
|
499
506
|
progress_monitor = Log::ProgressBar.new(size, :desc => desc)
|
500
|
-
else
|
501
|
-
progress_monitor = nil
|
502
507
|
end
|
503
508
|
|
504
509
|
# parser
|
@@ -507,7 +512,8 @@ module TSV
|
|
507
512
|
|
508
513
|
while not line.nil?
|
509
514
|
begin
|
510
|
-
progress_monitor.tick(stream.pos) if progress_monitor
|
515
|
+
#progress_monitor.tick(stream.pos) if progress_monitor
|
516
|
+
progress_monitor.tick if progress_monitor
|
511
517
|
|
512
518
|
raise SKIP_LINE if line.empty?
|
513
519
|
|
@@ -520,8 +526,6 @@ module TSV
|
|
520
526
|
|
521
527
|
yield key, values
|
522
528
|
|
523
|
-
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
524
|
-
|
525
529
|
line = stream.gets
|
526
530
|
|
527
531
|
line_num += 1
|
@@ -541,13 +545,13 @@ module TSV
|
|
541
545
|
raise $!
|
542
546
|
rescue Exception
|
543
547
|
Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
|
544
|
-
stream.abort if stream.respond_to? :abort
|
548
|
+
stream.abort $! if stream.respond_to? :abort
|
545
549
|
raise $!
|
546
550
|
end
|
547
551
|
end
|
548
552
|
|
549
553
|
ensure
|
550
|
-
stream.close
|
554
|
+
stream.close unless stream.closed?
|
551
555
|
stream.join if stream.respond_to? :join
|
552
556
|
end
|
553
557
|
|
data/lib/rbbt/tsv/stream.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'rbbt/tsv/parser'
|
2
1
|
require 'rbbt/tsv/dumper'
|
3
2
|
module TSV
|
4
3
|
|
@@ -17,56 +16,26 @@ module TSV
|
|
17
16
|
dumper
|
18
17
|
end
|
19
18
|
|
20
|
-
def self.paste_streams(
|
21
|
-
options = Misc.add_defaults options, :sep => "\t", :sort =>
|
22
|
-
sort = Misc.process_options options, :sort
|
23
|
-
|
24
|
-
input_streams = []
|
25
|
-
input_lines = []
|
26
|
-
input_fields = []
|
27
|
-
input_key_fields = []
|
28
|
-
input_options = []
|
29
|
-
|
30
|
-
input_source_streams = inputs.collect do |input|
|
31
|
-
stream = sort ? Misc.sort_stream(input) : TSV.get_stream(input)
|
32
|
-
stream
|
33
|
-
end
|
19
|
+
def self.paste_streams(streams, options = {})
|
20
|
+
options = Misc.add_defaults options, :sep => "\t", :sort => true
|
21
|
+
sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble
|
34
22
|
|
35
|
-
input_source_streams.each do |stream|
|
36
|
-
parser = TSV::Parser.new stream, options
|
37
|
-
input_streams << parser.stream
|
38
|
-
input_lines << parser.first_line
|
39
|
-
input_fields << parser.fields
|
40
|
-
input_key_fields << parser.key_field
|
41
|
-
input_options << parser.options
|
42
|
-
end
|
43
23
|
|
44
|
-
key_field = input_key_fields.first
|
45
|
-
fields = input_fields.flatten
|
46
|
-
options = options.merge(input_options.first)
|
47
24
|
|
48
|
-
|
49
|
-
dumper.close_in
|
50
|
-
dumper.close_out
|
51
|
-
header = TSV.header_lines(key_field, fields, options)
|
52
|
-
dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
|
53
|
-
dumper
|
54
|
-
end
|
25
|
+
out = Misc.open_pipe do |sin|
|
55
26
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
27
|
+
streams = streams.collect do |stream|
|
28
|
+
case stream
|
29
|
+
when (defined? Step and Step)
|
30
|
+
stream.grace
|
31
|
+
stream.get_stream || stream.join.path.open
|
32
|
+
when Path
|
33
|
+
stream.open
|
34
|
+
else
|
35
|
+
stream
|
36
|
+
end
|
66
37
|
end
|
67
|
-
end
|
68
38
|
|
69
|
-
out = Misc.open_pipe do |sin|
|
70
39
|
num_streams = streams.length
|
71
40
|
|
72
41
|
streams = streams.collect do |stream|
|
@@ -75,12 +44,13 @@ module TSV
|
|
75
44
|
sorted
|
76
45
|
end if sort
|
77
46
|
|
78
|
-
lines
|
79
|
-
fields
|
80
|
-
sizes
|
81
|
-
key_fields
|
47
|
+
lines = []
|
48
|
+
fields = []
|
49
|
+
sizes = []
|
50
|
+
key_fields = []
|
82
51
|
input_options = []
|
83
|
-
empty
|
52
|
+
empty = []
|
53
|
+
preambles = []
|
84
54
|
|
85
55
|
streams = streams.collect do |stream|
|
86
56
|
parser = TSV::Parser.new stream, options
|
@@ -88,8 +58,9 @@ module TSV
|
|
88
58
|
empty << stream if parser.first_line.nil?
|
89
59
|
key_fields << parser.key_field
|
90
60
|
fields << parser.fields
|
91
|
-
sizes << parser.fields.length
|
61
|
+
sizes << parser.fields.length if parser.fields
|
92
62
|
input_options << parser.options
|
63
|
+
preambles << parser.preamble if TrueClass === preamble and not parser.preamble.empty?
|
93
64
|
|
94
65
|
parser.stream
|
95
66
|
end
|
@@ -98,12 +69,20 @@ module TSV
|
|
98
69
|
fields = fields.compact.flatten
|
99
70
|
options = options.merge(input_options.first)
|
100
71
|
|
101
|
-
|
72
|
+
preamble_txt = case preamble
|
73
|
+
when TrueClass
|
74
|
+
preambles * "\n"
|
75
|
+
when String
|
76
|
+
preamble
|
77
|
+
else
|
78
|
+
nil
|
79
|
+
end
|
80
|
+
|
81
|
+
header = TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
|
82
|
+
sin.puts header
|
102
83
|
|
103
84
|
empty_pos = empty.collect{|stream| streams.index stream }
|
104
85
|
empty_pos.sort.reverse.each do |i|
|
105
|
-
lines.delete_at i
|
106
|
-
fields.delete_at i
|
107
86
|
key_fields.delete_at i
|
108
87
|
input_options.delete_at i
|
109
88
|
end
|
@@ -114,10 +93,18 @@ module TSV
|
|
114
93
|
keys = []
|
115
94
|
parts = []
|
116
95
|
lines.each_with_index do |line,i|
|
117
|
-
|
118
|
-
|
119
|
-
|
96
|
+
if line.nil?
|
97
|
+
keys[i] = nil
|
98
|
+
parts[i] = nil
|
99
|
+
else
|
100
|
+
vs = line.chomp.split(sep, -1)
|
101
|
+
key, *p = vs
|
102
|
+
keys[i] = key
|
103
|
+
parts[i] = p
|
104
|
+
end
|
105
|
+
sizes[i] ||= parts[i].length-1 unless parts[i].nil?
|
120
106
|
end
|
107
|
+
|
121
108
|
last_min = nil
|
122
109
|
while lines.compact.any?
|
123
110
|
min = keys.compact.sort.first
|
@@ -125,38 +112,62 @@ module TSV
|
|
125
112
|
keys.each_with_index do |key,i|
|
126
113
|
case key
|
127
114
|
when min
|
128
|
-
str <<
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
115
|
+
str << parts[i] * sep
|
116
|
+
|
117
|
+
begin
|
118
|
+
line = lines[i] = begin
|
119
|
+
streams[i].gets
|
120
|
+
rescue
|
121
|
+
Log.exception $!
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
if line.nil?
|
125
|
+
stream = streams[i]
|
126
|
+
keys[i] = nil
|
127
|
+
parts[i] = nil
|
128
|
+
else
|
129
|
+
k, *p = line.chomp.split(sep, -1)
|
130
|
+
raise TryAgain if k == keys[i]
|
131
|
+
keys[i] = k
|
132
|
+
parts[i] = p.collect{|e| e.nil? ? "" : e }
|
133
|
+
end
|
134
|
+
rescue TryAgain
|
135
|
+
Log.warn "Skipping repeated key in stream #{i}: #{keys[i]}"
|
136
|
+
retry
|
144
137
|
end
|
145
138
|
else
|
146
|
-
|
139
|
+
if sizes[i] > 0
|
140
|
+
p = sep * (sizes[i]-1)
|
141
|
+
str << p
|
142
|
+
end
|
147
143
|
end
|
148
144
|
end
|
149
145
|
|
150
|
-
|
146
|
+
values = str.inject(nil) do |acc,part|
|
147
|
+
if acc.nil?
|
148
|
+
acc = part.dup
|
149
|
+
else
|
150
|
+
acc << sep << part
|
151
|
+
end
|
152
|
+
acc
|
153
|
+
end
|
154
|
+
text = [min, values] * sep
|
155
|
+
sin.puts text
|
151
156
|
end
|
152
157
|
|
153
158
|
streams.each do |stream|
|
154
159
|
stream.join if stream.respond_to? :join
|
155
160
|
end
|
161
|
+
rescue Aborted
|
162
|
+
Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
163
|
+
streams.each do |stream|
|
164
|
+
stream.abort if stream.respond_to? :abort
|
165
|
+
end
|
166
|
+
raise $!
|
156
167
|
rescue Exception
|
157
168
|
Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
158
169
|
streams.each do |stream|
|
159
|
-
stream.abort
|
170
|
+
stream.abort if stream.respond_to? :abort
|
160
171
|
end
|
161
172
|
raise $!
|
162
173
|
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -142,11 +142,16 @@ module TSV
|
|
142
142
|
|
143
143
|
|
144
144
|
|
145
|
-
def self.header_lines(key_field, fields, entry_hash =
|
146
|
-
|
145
|
+
def self.header_lines(key_field, fields, entry_hash = nil)
|
146
|
+
if Hash === entry_hash
|
147
|
+
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
148
|
+
preamble = entry_hash[:preamble]
|
149
|
+
end
|
150
|
+
|
151
|
+
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
147
152
|
|
148
153
|
str = ""
|
149
|
-
str <<
|
154
|
+
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
150
155
|
if fields
|
151
156
|
str << "#" << key_field << sep << fields * sep << "\n"
|
152
157
|
end
|