rbbt-util 5.13.37 → 5.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
@@ -162,8 +162,7 @@ module TSV
162
162
  desc = @monitor[:desc] if @monitor.include? :desc
163
163
  step = @monitor[:step] if @monitor.include? :step
164
164
  end
165
- #progress_monitor = Progress::Bar.new(size, 0, step, desc)
166
- progress_monitor = Log::ProgressBar.new(size, :desc => desc)
165
+ progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc)
167
166
  else
168
167
  progress_monitor = nil
169
168
  end
@@ -192,10 +191,10 @@ module TSV
192
191
  when :flat, :single
193
192
  prepare_entity(value, traverser.new_field_names.first, entity_options)
194
193
  end
195
-
196
194
  end
197
195
 
198
196
 
197
+
199
198
  if zipped
200
199
 
201
200
  keys.each_with_index do |k,i|
@@ -224,8 +223,11 @@ module TSV
224
223
  end
225
224
 
226
225
  end
226
+
227
227
  end
228
228
 
229
+ Log::ProgressBar.remove_bar progress_monitor if progress_monitor
230
+
229
231
  [traverser.new_key_field_name, traverser.new_field_names]
230
232
  end
231
233
 
@@ -415,7 +417,7 @@ module TSV
415
417
  case
416
418
  when (Array === method and (key == :key or key_field == key))
417
419
  with_unnamed do
418
- Annotated.purge(method).uniq.each{|key|
420
+ TSV.traverse(Annotated.purge(method).uniq, :bar => true){|key|
419
421
  new[key] = self[key] if invert ^ (self.include? key)
420
422
  }
421
423
  end
@@ -38,7 +38,6 @@ module TSV
38
38
  end
39
39
  end
40
40
  rescue Exception
41
- Log.exception $!
42
41
  nil
43
42
  end
44
43
  end
@@ -186,8 +185,9 @@ module TSV
186
185
  end
187
186
  else
188
187
  options[:monitor] = bar
189
- TSV::Parser.traverse(io, options, &block)
188
+ TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
190
189
  end
190
+ Log::ProgressBar.remove_bar(bar) if bar
191
191
  join.call if join
192
192
  end
193
193
 
@@ -213,7 +213,7 @@ module TSV
213
213
  else
214
214
  obj.traverse(options, &block)
215
215
  end
216
- when IO, File
216
+ when IO, File, StringIO
217
217
  begin
218
218
  if options[:type] == :array
219
219
  traverse_io_array(obj, options, &block)
@@ -392,10 +392,11 @@ module TSV
392
392
  end
393
393
  true
394
394
  rescue Aborted, Interrupt
395
- Log.medium "Aborted storing into #{Misc.fingerprint store}: #{$!.message}"
395
+ Log.medium "Aborted storing into #{Misc.fingerprint store}"
396
396
  stream = obj_stream(store)
397
397
  stream.abort if stream.respond_to? :abort
398
398
  rescue Exception
399
+ Log.medium "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
399
400
  stream = obj_stream(store)
400
401
  stream.abort if stream.respond_to? :abort
401
402
  raise $!
@@ -462,7 +463,7 @@ module TSV
462
463
  thread = Thread.new(Thread.current) do |parent|
463
464
  begin
464
465
  traverse_run(obj, threads, cpus, options, &block)
465
- into.close if into.respond_to? :close
466
+ into.close if into.respond_to?(:close) and not (into.respond_to? :closed? and into.closed?)
466
467
  rescue Exception
467
468
  stream = obj_stream(obj)
468
469
  stream.abort if stream and stream.respond_to? :abort
@@ -557,7 +558,7 @@ module TSV
557
558
  traverse_stream(obj, threads, cpus, options, &block)
558
559
  else
559
560
  traverse_run(obj, threads, cpus, options, &block)
560
- into.close if into.respond_to? :close
561
+ into.close if into.respond_to?(:close) and not (into.respond_to? :closed and into.closed?)
561
562
  end
562
563
 
563
564
  into
@@ -1,7 +1,7 @@
1
1
  require 'rbbt/util/cmd'
2
2
  module TSV
3
3
  class Parser
4
- attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
4
+ attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble
5
5
 
6
6
  class SKIP_LINE < Exception; end
7
7
  class END_PARSING < Exception; end
@@ -13,20 +13,22 @@ module TSV
13
13
 
14
14
  def parse_header(stream)
15
15
  options = {}
16
+ @preamble = []
16
17
 
17
18
  # Get line
18
19
 
19
20
  #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
20
21
  line = stream.gets
21
- raise "Empty content: #{ stream.inspect }" if line.nil?
22
- line = Misc.fixutf8 line
23
- line.chomp!
22
+ return {} if line.nil?
23
+ #raise "Empty content: #{ stream.inspect }" if line.nil?
24
+ line = Misc.fixutf8 line.chomp
24
25
 
25
26
  # Process options line
26
27
 
27
28
  if line and line =~ /^#{@header_hash}: (.*)/
28
- options = Misc.string2hash $1.strip
29
- line = Misc.fixutf8 stream.gets
29
+ options = Misc.string2hash $1.chomp
30
+ line = stream.gets
31
+ line = Misc.fixutf8 line.chomp if line
30
32
  end
31
33
 
32
34
  # Determine separator
@@ -35,16 +37,20 @@ module TSV
35
37
 
36
38
  # Process fields line
37
39
 
40
+ preamble << line if line
38
41
  while line and Misc.fixutf8(line) =~ /^#{@header_hash}/
39
- line.chomp!
40
42
  @fields = line.split(@sep)
41
43
  @key_field = @fields.shift
42
44
  @key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character
43
45
 
44
46
  #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
45
- line = @header_hash != "" ? Misc.fixutf8(stream.gets) : nil
47
+ line = (@header_hash != "" ? stream.gets : nil)
48
+ line = Misc.fixutf8 line.chomp if line
49
+ preamble << line if line
46
50
  end
47
51
 
52
+ @preamble = preamble[0..-3] * "\n"
53
+
48
54
  line ||= stream.gets
49
55
 
50
56
  @first_line = line
@@ -112,7 +118,7 @@ module TSV
112
118
  []
113
119
  else
114
120
  parts.values_at *field_positions
115
- end.collect{|value| value.split(@sep2, -1)}
121
+ end.collect{|value| value.nil? ? [] : value.split(@sep2, -1) }
116
122
  [keys, values]
117
123
  end
118
124
 
@@ -482,6 +488,7 @@ module TSV
482
488
  # first line
483
489
  line = self.rescue_first_line
484
490
 
491
+ progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
485
492
  # setup monitor
486
493
  if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
487
494
  size = case
@@ -497,8 +504,6 @@ module TSV
497
504
  step = monitor[:step] if monitor.include? :step
498
505
  end
499
506
  progress_monitor = Log::ProgressBar.new(size, :desc => desc)
500
- else
501
- progress_monitor = nil
502
507
  end
503
508
 
504
509
  # parser
@@ -507,7 +512,8 @@ module TSV
507
512
 
508
513
  while not line.nil?
509
514
  begin
510
- progress_monitor.tick(stream.pos) if progress_monitor
515
+ #progress_monitor.tick(stream.pos) if progress_monitor
516
+ progress_monitor.tick if progress_monitor
511
517
 
512
518
  raise SKIP_LINE if line.empty?
513
519
 
@@ -520,8 +526,6 @@ module TSV
520
526
 
521
527
  yield key, values
522
528
 
523
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
524
-
525
529
  line = stream.gets
526
530
 
527
531
  line_num += 1
@@ -541,13 +545,13 @@ module TSV
541
545
  raise $!
542
546
  rescue Exception
543
547
  Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
544
- stream.abort if stream.respond_to? :abort
548
+ stream.abort $! if stream.respond_to? :abort
545
549
  raise $!
546
550
  end
547
551
  end
548
552
 
549
553
  ensure
550
- stream.close
554
+ stream.close unless stream.closed?
551
555
  stream.join if stream.respond_to? :join
552
556
  end
553
557
 
@@ -1,4 +1,3 @@
1
- require 'rbbt/tsv/parser'
2
1
  require 'rbbt/tsv/dumper'
3
2
  module TSV
4
3
 
@@ -17,56 +16,26 @@ module TSV
17
16
  dumper
18
17
  end
19
18
 
20
- def self.paste_streams(inputs, options = {})
21
- options = Misc.add_defaults options, :sep => "\t", :sort => false
22
- sort = Misc.process_options options, :sort
23
-
24
- input_streams = []
25
- input_lines = []
26
- input_fields = []
27
- input_key_fields = []
28
- input_options = []
29
-
30
- input_source_streams = inputs.collect do |input|
31
- stream = sort ? Misc.sort_stream(input) : TSV.get_stream(input)
32
- stream
33
- end
19
+ def self.paste_streams(streams, options = {})
20
+ options = Misc.add_defaults options, :sep => "\t", :sort => true
21
+ sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble
34
22
 
35
- input_source_streams.each do |stream|
36
- parser = TSV::Parser.new stream, options
37
- input_streams << parser.stream
38
- input_lines << parser.first_line
39
- input_fields << parser.fields
40
- input_key_fields << parser.key_field
41
- input_options << parser.options
42
- end
43
23
 
44
- key_field = input_key_fields.first
45
- fields = input_fields.flatten
46
- options = options.merge(input_options.first)
47
24
 
48
- dumper = TSV::Dumper.new options.merge(:key_field => key_field, :fields => fields)
49
- dumper.close_in
50
- dumper.close_out
51
- header = TSV.header_lines(key_field, fields, options)
52
- dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
53
- dumper
54
- end
25
+ out = Misc.open_pipe do |sin|
55
26
 
56
- def self.paste_streams(streams, options = {})
57
- options = Misc.add_defaults options, :sep => "\t", :sort => true
58
- sort, sep = Misc.process_options options, :sort, :sep
59
-
60
- streams = streams.collect do |stream|
61
- if defined? Step and Step === stream
62
- stream.grace
63
- stream.get_stream || stream.join.path.open
64
- else
65
- stream
27
+ streams = streams.collect do |stream|
28
+ case stream
29
+ when (defined? Step and Step)
30
+ stream.grace
31
+ stream.get_stream || stream.join.path.open
32
+ when Path
33
+ stream.open
34
+ else
35
+ stream
36
+ end
66
37
  end
67
- end
68
38
 
69
- out = Misc.open_pipe do |sin|
70
39
  num_streams = streams.length
71
40
 
72
41
  streams = streams.collect do |stream|
@@ -75,12 +44,13 @@ module TSV
75
44
  sorted
76
45
  end if sort
77
46
 
78
- lines = []
79
- fields = []
80
- sizes = []
81
- key_fields = []
47
+ lines = []
48
+ fields = []
49
+ sizes = []
50
+ key_fields = []
82
51
  input_options = []
83
- empty = []
52
+ empty = []
53
+ preambles = []
84
54
 
85
55
  streams = streams.collect do |stream|
86
56
  parser = TSV::Parser.new stream, options
@@ -88,8 +58,9 @@ module TSV
88
58
  empty << stream if parser.first_line.nil?
89
59
  key_fields << parser.key_field
90
60
  fields << parser.fields
91
- sizes << parser.fields.length
61
+ sizes << parser.fields.length if parser.fields
92
62
  input_options << parser.options
63
+ preambles << parser.preamble if TrueClass === preamble and not parser.preamble.empty?
93
64
 
94
65
  parser.stream
95
66
  end
@@ -98,12 +69,20 @@ module TSV
98
69
  fields = fields.compact.flatten
99
70
  options = options.merge(input_options.first)
100
71
 
101
- sin.puts TSV.header_lines(key_field, fields, options)
72
+ preamble_txt = case preamble
73
+ when TrueClass
74
+ preambles * "\n"
75
+ when String
76
+ preamble
77
+ else
78
+ nil
79
+ end
80
+
81
+ header = TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
82
+ sin.puts header
102
83
 
103
84
  empty_pos = empty.collect{|stream| streams.index stream }
104
85
  empty_pos.sort.reverse.each do |i|
105
- lines.delete_at i
106
- fields.delete_at i
107
86
  key_fields.delete_at i
108
87
  input_options.delete_at i
109
88
  end
@@ -114,10 +93,18 @@ module TSV
114
93
  keys = []
115
94
  parts = []
116
95
  lines.each_with_index do |line,i|
117
- key, *p = line.strip.split(sep, -1)
118
- keys[i] = key
119
- parts[i] = p
96
+ if line.nil?
97
+ keys[i] = nil
98
+ parts[i] = nil
99
+ else
100
+ vs = line.chomp.split(sep, -1)
101
+ key, *p = vs
102
+ keys[i] = key
103
+ parts[i] = p
104
+ end
105
+ sizes[i] ||= parts[i].length-1 unless parts[i].nil?
120
106
  end
107
+
121
108
  last_min = nil
122
109
  while lines.compact.any?
123
110
  min = keys.compact.sort.first
@@ -125,38 +112,62 @@ module TSV
125
112
  keys.each_with_index do |key,i|
126
113
  case key
127
114
  when min
128
- str << [parts[i] * sep]
129
-
130
- line = lines[i] = begin
131
- streams[i].gets
132
- rescue
133
- Log.exception $!
134
- nil
135
- end
136
- if line.nil?
137
- stream = streams[i]
138
- keys[i] = nil
139
- parts[i] = nil
140
- else
141
- k, *p = line.strip.split(sep, -1)
142
- keys[i] = k
143
- parts[i] = p
115
+ str << parts[i] * sep
116
+
117
+ begin
118
+ line = lines[i] = begin
119
+ streams[i].gets
120
+ rescue
121
+ Log.exception $!
122
+ nil
123
+ end
124
+ if line.nil?
125
+ stream = streams[i]
126
+ keys[i] = nil
127
+ parts[i] = nil
128
+ else
129
+ k, *p = line.chomp.split(sep, -1)
130
+ raise TryAgain if k == keys[i]
131
+ keys[i] = k
132
+ parts[i] = p.collect{|e| e.nil? ? "" : e }
133
+ end
134
+ rescue TryAgain
135
+ Log.warn "Skipping repeated key in stream #{i}: #{keys[i]}"
136
+ retry
144
137
  end
145
138
  else
146
- str << [sep * (sizes[i]-1)] if sizes[i] > 0
139
+ if sizes[i] > 0
140
+ p = sep * (sizes[i]-1)
141
+ str << p
142
+ end
147
143
  end
148
144
  end
149
145
 
150
- sin.puts [min, str*sep] * sep
146
+ values = str.inject(nil) do |acc,part|
147
+ if acc.nil?
148
+ acc = part.dup
149
+ else
150
+ acc << sep << part
151
+ end
152
+ acc
153
+ end
154
+ text = [min, values] * sep
155
+ sin.puts text
151
156
  end
152
157
 
153
158
  streams.each do |stream|
154
159
  stream.join if stream.respond_to? :join
155
160
  end
161
+ rescue Aborted
162
+ Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
163
+ streams.each do |stream|
164
+ stream.abort if stream.respond_to? :abort
165
+ end
166
+ raise $!
156
167
  rescue Exception
157
168
  Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
158
169
  streams.each do |stream|
159
- stream.abort
170
+ stream.abort if stream.respond_to? :abort
160
171
  end
161
172
  raise $!
162
173
  end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -142,11 +142,16 @@ module TSV
142
142
 
143
143
 
144
144
 
145
- def self.header_lines(key_field, fields, entry_hash = {})
146
- sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
145
+ def self.header_lines(key_field, fields, entry_hash = nil)
146
+ if Hash === entry_hash
147
+ sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
148
+ preamble = entry_hash[:preamble]
149
+ end
150
+
151
+ preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
147
152
 
148
153
  str = ""
149
- str << "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if entry_hash and entry_hash.any?
154
+ str << preamble.strip << "\n" if preamble and not preamble.empty?
150
155
  if fields
151
156
  str << "#" << key_field << sep << fields * sep << "\n"
152
157
  end