rbbt-util 5.13.37 → 5.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
@@ -162,8 +162,7 @@ module TSV
162
162
  desc = @monitor[:desc] if @monitor.include? :desc
163
163
  step = @monitor[:step] if @monitor.include? :step
164
164
  end
165
- #progress_monitor = Progress::Bar.new(size, 0, step, desc)
166
- progress_monitor = Log::ProgressBar.new(size, :desc => desc)
165
+ progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc)
167
166
  else
168
167
  progress_monitor = nil
169
168
  end
@@ -192,10 +191,10 @@ module TSV
192
191
  when :flat, :single
193
192
  prepare_entity(value, traverser.new_field_names.first, entity_options)
194
193
  end
195
-
196
194
  end
197
195
 
198
196
 
197
+
199
198
  if zipped
200
199
 
201
200
  keys.each_with_index do |k,i|
@@ -224,8 +223,11 @@ module TSV
224
223
  end
225
224
 
226
225
  end
226
+
227
227
  end
228
228
 
229
+ Log::ProgressBar.remove_bar progress_monitor if progress_monitor
230
+
229
231
  [traverser.new_key_field_name, traverser.new_field_names]
230
232
  end
231
233
 
@@ -415,7 +417,7 @@ module TSV
415
417
  case
416
418
  when (Array === method and (key == :key or key_field == key))
417
419
  with_unnamed do
418
- Annotated.purge(method).uniq.each{|key|
420
+ TSV.traverse(Annotated.purge(method).uniq, :bar => true){|key|
419
421
  new[key] = self[key] if invert ^ (self.include? key)
420
422
  }
421
423
  end
@@ -38,7 +38,6 @@ module TSV
38
38
  end
39
39
  end
40
40
  rescue Exception
41
- Log.exception $!
42
41
  nil
43
42
  end
44
43
  end
@@ -186,8 +185,9 @@ module TSV
186
185
  end
187
186
  else
188
187
  options[:monitor] = bar
189
- TSV::Parser.traverse(io, options, &block)
188
+ TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
190
189
  end
190
+ Log::ProgressBar.remove_bar(bar) if bar
191
191
  join.call if join
192
192
  end
193
193
 
@@ -213,7 +213,7 @@ module TSV
213
213
  else
214
214
  obj.traverse(options, &block)
215
215
  end
216
- when IO, File
216
+ when IO, File, StringIO
217
217
  begin
218
218
  if options[:type] == :array
219
219
  traverse_io_array(obj, options, &block)
@@ -392,10 +392,11 @@ module TSV
392
392
  end
393
393
  true
394
394
  rescue Aborted, Interrupt
395
- Log.medium "Aborted storing into #{Misc.fingerprint store}: #{$!.message}"
395
+ Log.medium "Aborted storing into #{Misc.fingerprint store}"
396
396
  stream = obj_stream(store)
397
397
  stream.abort if stream.respond_to? :abort
398
398
  rescue Exception
399
+ Log.medium "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
399
400
  stream = obj_stream(store)
400
401
  stream.abort if stream.respond_to? :abort
401
402
  raise $!
@@ -462,7 +463,7 @@ module TSV
462
463
  thread = Thread.new(Thread.current) do |parent|
463
464
  begin
464
465
  traverse_run(obj, threads, cpus, options, &block)
465
- into.close if into.respond_to? :close
466
+ into.close if into.respond_to?(:close) and not (into.respond_to? :closed? and into.closed?)
466
467
  rescue Exception
467
468
  stream = obj_stream(obj)
468
469
  stream.abort if stream and stream.respond_to? :abort
@@ -557,7 +558,7 @@ module TSV
557
558
  traverse_stream(obj, threads, cpus, options, &block)
558
559
  else
559
560
  traverse_run(obj, threads, cpus, options, &block)
560
- into.close if into.respond_to? :close
561
+ into.close if into.respond_to?(:close) and not (into.respond_to? :closed and into.closed?)
561
562
  end
562
563
 
563
564
  into
@@ -1,7 +1,7 @@
1
1
  require 'rbbt/util/cmd'
2
2
  module TSV
3
3
  class Parser
4
- attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
4
+ attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble
5
5
 
6
6
  class SKIP_LINE < Exception; end
7
7
  class END_PARSING < Exception; end
@@ -13,20 +13,22 @@ module TSV
13
13
 
14
14
  def parse_header(stream)
15
15
  options = {}
16
+ @preamble = []
16
17
 
17
18
  # Get line
18
19
 
19
20
  #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
20
21
  line = stream.gets
21
- raise "Empty content: #{ stream.inspect }" if line.nil?
22
- line = Misc.fixutf8 line
23
- line.chomp!
22
+ return {} if line.nil?
23
+ #raise "Empty content: #{ stream.inspect }" if line.nil?
24
+ line = Misc.fixutf8 line.chomp
24
25
 
25
26
  # Process options line
26
27
 
27
28
  if line and line =~ /^#{@header_hash}: (.*)/
28
- options = Misc.string2hash $1.strip
29
- line = Misc.fixutf8 stream.gets
29
+ options = Misc.string2hash $1.chomp
30
+ line = stream.gets
31
+ line = Misc.fixutf8 line.chomp if line
30
32
  end
31
33
 
32
34
  # Determine separator
@@ -35,16 +37,20 @@ module TSV
35
37
 
36
38
  # Process fields line
37
39
 
40
+ preamble << line if line
38
41
  while line and Misc.fixutf8(line) =~ /^#{@header_hash}/
39
- line.chomp!
40
42
  @fields = line.split(@sep)
41
43
  @key_field = @fields.shift
42
44
  @key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character
43
45
 
44
46
  #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
45
- line = @header_hash != "" ? Misc.fixutf8(stream.gets) : nil
47
+ line = (@header_hash != "" ? stream.gets : nil)
48
+ line = Misc.fixutf8 line.chomp if line
49
+ preamble << line if line
46
50
  end
47
51
 
52
+ @preamble = preamble[0..-3] * "\n"
53
+
48
54
  line ||= stream.gets
49
55
 
50
56
  @first_line = line
@@ -112,7 +118,7 @@ module TSV
112
118
  []
113
119
  else
114
120
  parts.values_at *field_positions
115
- end.collect{|value| value.split(@sep2, -1)}
121
+ end.collect{|value| value.nil? ? [] : value.split(@sep2, -1) }
116
122
  [keys, values]
117
123
  end
118
124
 
@@ -482,6 +488,7 @@ module TSV
482
488
  # first line
483
489
  line = self.rescue_first_line
484
490
 
491
+ progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
485
492
  # setup monitor
486
493
  if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
487
494
  size = case
@@ -497,8 +504,6 @@ module TSV
497
504
  step = monitor[:step] if monitor.include? :step
498
505
  end
499
506
  progress_monitor = Log::ProgressBar.new(size, :desc => desc)
500
- else
501
- progress_monitor = nil
502
507
  end
503
508
 
504
509
  # parser
@@ -507,7 +512,8 @@ module TSV
507
512
 
508
513
  while not line.nil?
509
514
  begin
510
- progress_monitor.tick(stream.pos) if progress_monitor
515
+ #progress_monitor.tick(stream.pos) if progress_monitor
516
+ progress_monitor.tick if progress_monitor
511
517
 
512
518
  raise SKIP_LINE if line.empty?
513
519
 
@@ -520,8 +526,6 @@ module TSV
520
526
 
521
527
  yield key, values
522
528
 
523
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
524
-
525
529
  line = stream.gets
526
530
 
527
531
  line_num += 1
@@ -541,13 +545,13 @@ module TSV
541
545
  raise $!
542
546
  rescue Exception
543
547
  Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
544
- stream.abort if stream.respond_to? :abort
548
+ stream.abort $! if stream.respond_to? :abort
545
549
  raise $!
546
550
  end
547
551
  end
548
552
 
549
553
  ensure
550
- stream.close
554
+ stream.close unless stream.closed?
551
555
  stream.join if stream.respond_to? :join
552
556
  end
553
557
 
@@ -1,4 +1,3 @@
1
- require 'rbbt/tsv/parser'
2
1
  require 'rbbt/tsv/dumper'
3
2
  module TSV
4
3
 
@@ -17,56 +16,26 @@ module TSV
17
16
  dumper
18
17
  end
19
18
 
20
- def self.paste_streams(inputs, options = {})
21
- options = Misc.add_defaults options, :sep => "\t", :sort => false
22
- sort = Misc.process_options options, :sort
23
-
24
- input_streams = []
25
- input_lines = []
26
- input_fields = []
27
- input_key_fields = []
28
- input_options = []
29
-
30
- input_source_streams = inputs.collect do |input|
31
- stream = sort ? Misc.sort_stream(input) : TSV.get_stream(input)
32
- stream
33
- end
19
+ def self.paste_streams(streams, options = {})
20
+ options = Misc.add_defaults options, :sep => "\t", :sort => true
21
+ sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble
34
22
 
35
- input_source_streams.each do |stream|
36
- parser = TSV::Parser.new stream, options
37
- input_streams << parser.stream
38
- input_lines << parser.first_line
39
- input_fields << parser.fields
40
- input_key_fields << parser.key_field
41
- input_options << parser.options
42
- end
43
23
 
44
- key_field = input_key_fields.first
45
- fields = input_fields.flatten
46
- options = options.merge(input_options.first)
47
24
 
48
- dumper = TSV::Dumper.new options.merge(:key_field => key_field, :fields => fields)
49
- dumper.close_in
50
- dumper.close_out
51
- header = TSV.header_lines(key_field, fields, options)
52
- dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
53
- dumper
54
- end
25
+ out = Misc.open_pipe do |sin|
55
26
 
56
- def self.paste_streams(streams, options = {})
57
- options = Misc.add_defaults options, :sep => "\t", :sort => true
58
- sort, sep = Misc.process_options options, :sort, :sep
59
-
60
- streams = streams.collect do |stream|
61
- if defined? Step and Step === stream
62
- stream.grace
63
- stream.get_stream || stream.join.path.open
64
- else
65
- stream
27
+ streams = streams.collect do |stream|
28
+ case stream
29
+ when (defined? Step and Step)
30
+ stream.grace
31
+ stream.get_stream || stream.join.path.open
32
+ when Path
33
+ stream.open
34
+ else
35
+ stream
36
+ end
66
37
  end
67
- end
68
38
 
69
- out = Misc.open_pipe do |sin|
70
39
  num_streams = streams.length
71
40
 
72
41
  streams = streams.collect do |stream|
@@ -75,12 +44,13 @@ module TSV
75
44
  sorted
76
45
  end if sort
77
46
 
78
- lines = []
79
- fields = []
80
- sizes = []
81
- key_fields = []
47
+ lines = []
48
+ fields = []
49
+ sizes = []
50
+ key_fields = []
82
51
  input_options = []
83
- empty = []
52
+ empty = []
53
+ preambles = []
84
54
 
85
55
  streams = streams.collect do |stream|
86
56
  parser = TSV::Parser.new stream, options
@@ -88,8 +58,9 @@ module TSV
88
58
  empty << stream if parser.first_line.nil?
89
59
  key_fields << parser.key_field
90
60
  fields << parser.fields
91
- sizes << parser.fields.length
61
+ sizes << parser.fields.length if parser.fields
92
62
  input_options << parser.options
63
+ preambles << parser.preamble if TrueClass === preamble and not parser.preamble.empty?
93
64
 
94
65
  parser.stream
95
66
  end
@@ -98,12 +69,20 @@ module TSV
98
69
  fields = fields.compact.flatten
99
70
  options = options.merge(input_options.first)
100
71
 
101
- sin.puts TSV.header_lines(key_field, fields, options)
72
+ preamble_txt = case preamble
73
+ when TrueClass
74
+ preambles * "\n"
75
+ when String
76
+ preamble
77
+ else
78
+ nil
79
+ end
80
+
81
+ header = TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
82
+ sin.puts header
102
83
 
103
84
  empty_pos = empty.collect{|stream| streams.index stream }
104
85
  empty_pos.sort.reverse.each do |i|
105
- lines.delete_at i
106
- fields.delete_at i
107
86
  key_fields.delete_at i
108
87
  input_options.delete_at i
109
88
  end
@@ -114,10 +93,18 @@ module TSV
114
93
  keys = []
115
94
  parts = []
116
95
  lines.each_with_index do |line,i|
117
- key, *p = line.strip.split(sep, -1)
118
- keys[i] = key
119
- parts[i] = p
96
+ if line.nil?
97
+ keys[i] = nil
98
+ parts[i] = nil
99
+ else
100
+ vs = line.chomp.split(sep, -1)
101
+ key, *p = vs
102
+ keys[i] = key
103
+ parts[i] = p
104
+ end
105
+ sizes[i] ||= parts[i].length-1 unless parts[i].nil?
120
106
  end
107
+
121
108
  last_min = nil
122
109
  while lines.compact.any?
123
110
  min = keys.compact.sort.first
@@ -125,38 +112,62 @@ module TSV
125
112
  keys.each_with_index do |key,i|
126
113
  case key
127
114
  when min
128
- str << [parts[i] * sep]
129
-
130
- line = lines[i] = begin
131
- streams[i].gets
132
- rescue
133
- Log.exception $!
134
- nil
135
- end
136
- if line.nil?
137
- stream = streams[i]
138
- keys[i] = nil
139
- parts[i] = nil
140
- else
141
- k, *p = line.strip.split(sep, -1)
142
- keys[i] = k
143
- parts[i] = p
115
+ str << parts[i] * sep
116
+
117
+ begin
118
+ line = lines[i] = begin
119
+ streams[i].gets
120
+ rescue
121
+ Log.exception $!
122
+ nil
123
+ end
124
+ if line.nil?
125
+ stream = streams[i]
126
+ keys[i] = nil
127
+ parts[i] = nil
128
+ else
129
+ k, *p = line.chomp.split(sep, -1)
130
+ raise TryAgain if k == keys[i]
131
+ keys[i] = k
132
+ parts[i] = p.collect{|e| e.nil? ? "" : e }
133
+ end
134
+ rescue TryAgain
135
+ Log.warn "Skipping repeated key in stream #{i}: #{keys[i]}"
136
+ retry
144
137
  end
145
138
  else
146
- str << [sep * (sizes[i]-1)] if sizes[i] > 0
139
+ if sizes[i] > 0
140
+ p = sep * (sizes[i]-1)
141
+ str << p
142
+ end
147
143
  end
148
144
  end
149
145
 
150
- sin.puts [min, str*sep] * sep
146
+ values = str.inject(nil) do |acc,part|
147
+ if acc.nil?
148
+ acc = part.dup
149
+ else
150
+ acc << sep << part
151
+ end
152
+ acc
153
+ end
154
+ text = [min, values] * sep
155
+ sin.puts text
151
156
  end
152
157
 
153
158
  streams.each do |stream|
154
159
  stream.join if stream.respond_to? :join
155
160
  end
161
+ rescue Aborted
162
+ Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
163
+ streams.each do |stream|
164
+ stream.abort if stream.respond_to? :abort
165
+ end
166
+ raise $!
156
167
  rescue Exception
157
168
  Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
158
169
  streams.each do |stream|
159
- stream.abort
170
+ stream.abort if stream.respond_to? :abort
160
171
  end
161
172
  raise $!
162
173
  end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -142,11 +142,16 @@ module TSV
142
142
 
143
143
 
144
144
 
145
- def self.header_lines(key_field, fields, entry_hash = {})
146
- sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
145
+ def self.header_lines(key_field, fields, entry_hash = nil)
146
+ if Hash === entry_hash
147
+ sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
148
+ preamble = entry_hash[:preamble]
149
+ end
150
+
151
+ preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
147
152
 
148
153
  str = ""
149
- str << "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if entry_hash and entry_hash.any?
154
+ str << preamble.strip << "\n" if preamble and not preamble.empty?
150
155
  if fields
151
156
  str << "#" << key_field << sep << fields * sep << "\n"
152
157
  end