rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,258 +1,257 @@
1
- require 'rbbt/tsv/dumper'
2
1
  module TSV
3
2
 
4
- def self.collapse_stream(input, options = {}, &block)
5
- options = Misc.add_defaults options, :sep => "\t", :header_hash => '#', :uniq => true
6
- input_stream = TSV.get_stream input
3
+ #def self.collapse_stream(input, options = {}, &block)
4
+ # options = IndiferentHash.add_defaults options, :sep => "\t", :header_hash => '#', :uniq => true
5
+ # input_stream = TSV.get_stream input
7
6
 
8
- header_hash = options[:header_hash]
9
- cmd_args = options[:uniq] ? "-u" : nil
7
+ # header_hash = options[:header_hash]
8
+ # cmd_args = options[:uniq] ? "-u" : nil
10
9
 
11
- sorted_input_stream = Misc.sort_stream input_stream, header_hash, cmd_args
10
+ # sorted_input_stream = Open.sort_stream input_stream, header_hash, cmd_args
12
11
 
13
- parser = TSV::Parser.new(sorted_input_stream, options.dup)
14
- dumper = TSV::Dumper.new parser
15
- header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
16
- dumper.close_in
17
- dumper.close_out
18
- dumper.stream = Misc.collapse_stream parser.stream, parser.first_line, parser.sep, header, &block
19
- dumper
20
- end
12
+ # parser = TSV::Parser.new(sorted_input_stream, options.dup)
13
+ # dumper = TSV::Dumper.new parser
14
+ # header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
15
+ # dumper.close_in
16
+ # dumper.close_out
17
+ # dumper.stream = Open.collapse_stream parser.stream, parser.first_line, parser.sep, header, &block
18
+ # dumper
19
+ #end
21
20
 
22
- def self.paste_streams(streams, options = {})
23
- options = Misc.add_defaults options, :sep => "\t", :sort => true
24
- sort, sep, preamble, header, same_fields, fix_flat, all_match, field_prefix = Misc.process_options options, :sort, :sep, :preamble, :header, :same_fields, :fix_flat, :all_match, :field_prefix
25
-
26
- out = Misc.open_pipe do |sin|
27
-
28
- streams = streams.collect do |stream|
29
- case stream
30
- when (defined? Step and Step)
31
- stream.grace
32
- stream.get_stream || Open.open(stream.join.path)
33
- when Path
34
- stream.open
35
- when TSV::Dumper
36
- stream.stream
37
- else
38
- stream
39
- end
40
- end.compact
41
-
42
- num_streams = streams.length
43
-
44
- streams = streams.collect do |stream|
45
- sorted = Misc.sort_stream(stream)
46
- stream.annotate sorted if stream.respond_to? :annotate
47
- sorted
48
- end if sort
49
-
50
- lines = []
51
- fields = []
52
- sizes = []
53
- key_fields = []
54
- input_options = []
55
- empty = []
56
- preambles = []
57
-
58
- streams = streams.collect do |stream|
59
-
60
- parser = TSV::Parser.new stream, options.dup
61
- sfields = parser.fields
62
-
63
- if field_prefix
64
- index = streams.index stream
65
- prefix = field_prefix[index]
66
-
67
- sfields = sfields.collect{|f| [prefix, f] * ":" }
68
- end
69
-
70
- first_line = parser.first_line
71
- first_line = nil if first_line == ""
72
-
73
- lines << first_line
74
- key_fields << parser.key_field
75
- fields << sfields
76
- sizes << sfields.length if sfields
77
- input_options << parser.options
78
- preambles << parser.preamble if preamble and not parser.preamble.empty?
79
-
80
- stream = if fix_flat and parser.type == :flat and first_line
81
- parts = lines[-1].nil? ? [] : lines[-1].split("\t")
82
- lines[-1] = [parts[0], (parts[1..-1] || [])*"|"] * "\t"
83
- TSV.stream_flat2double(parser.stream, :noheader => true).stream
84
- else
85
- parser.stream
86
- end
87
-
88
- empty << stream if parser.first_line.nil? || parser.first_line.empty?
89
-
90
- stream
91
- end
92
-
93
- all_fields = fields
94
- key_field = key_fields.compact.first
95
- if same_fields
96
- fields = fields.first
97
- else
98
- fields = fields.compact.flatten
99
- end
100
- options = options.merge(input_options.first || {})
101
- options[:type] = :list if options[:type] == :single
102
- options[:type] = :double if fix_flat
103
-
104
- preamble_txt = case preamble
105
- when TrueClass
106
- preambles * "\n"
107
- when String
108
- if preamble[0] == '+'
109
- preambles * "\n" + "\n" + preamble[1..-1]
110
- else
111
- preamble
112
- end
113
- else
114
- nil
115
- end
116
-
117
- header ||= TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
118
- sin.puts header
119
-
120
- empty_pos = empty.collect{|stream| streams.index stream }
121
- empty_pos.sort.reverse.each do |i|
122
- key_fields.delete_at i
123
- input_options.delete_at i
124
- end
125
-
126
- begin
127
- done_streams = []
128
-
129
- keys = []
130
- parts = []
131
- lines.each_with_index do |line,i|
132
- if line.nil? || line.empty?
133
- keys[i] = nil
134
- parts[i] = nil
135
- else
136
- vs = line.chomp.split(sep, -1)
137
- key, *p = vs
138
- keys[i] = key
139
- parts[i] = p
140
- end
141
- sizes[i] ||= parts[i].length-1 unless parts[i].nil?
142
- end
143
-
144
- last_min = nil
145
- while lines.compact.any?
146
- min = keys.compact.sort.first
147
- break if min.nil?
148
- str = []
149
-
150
- skip = all_match && keys.uniq != [min]
151
-
152
- keys.each_with_index do |key,i|
153
- case key
154
- when min
155
- str << parts[i] * sep
156
-
157
- begin
158
- line = lines[i] = begin
159
- streams[i].gets
160
- rescue
161
- Log.exception $!
162
- nil
163
- end
164
- if line.nil?
165
- stream = streams[i]
166
- keys[i] = nil
167
- parts[i] = nil
168
- else
169
- k, *p = line.chomp.split(sep, -1)
170
- raise TryAgain if k == keys[i]
171
- keys[i] = k
172
- parts[i] = p.collect{|e| e.nil? ? "" : e }
173
- end
174
- rescue TryAgain
175
- Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
176
- retry
177
- end
178
- else
179
- if sizes[i] and sizes[i] > 0
180
- p = sep * (sizes[i]-1)
181
- str << p
182
- end
183
- end
184
- end
185
-
186
- next if skip
187
-
188
- if same_fields
189
-
190
- values = nil
191
- str.each do |part|
192
- next if part.nil? or part.empty?
193
- _p = part.split(sep,-1)
194
- if values.nil?
195
- values = _p.collect{|v| [v]}
196
- else
197
- _p.each_with_index{|v,i| values[i] ||= []; values[i] << v}
198
- end
199
- end
200
-
201
- values = [[]] * str.length if values.nil?
202
- values = values.collect{|list| list * "|" } * sep
203
-
204
- else
205
- values = str.inject(nil) do |acc,part|
206
- if acc.nil?
207
- acc = part.dup
208
- else
209
- acc << sep << part
210
- end
211
- acc
212
- end
213
- end
214
- text = [min, values] * sep
215
- sin.puts text
216
- end
217
-
218
- streams.each do |stream|
219
- stream.join if stream.respond_to? :join
220
- end
221
- rescue Aborted
222
- Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
223
- streams.each do |stream|
224
- stream.abort if stream.respond_to? :abort
225
- end
226
- raise $!
227
- rescue Exception
228
- Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
229
- streams.each do |stream|
230
- stream.abort if stream.respond_to? :abort
231
- end
232
- raise $!
233
- end
234
- end
235
-
236
- out
237
- end
238
-
239
- def self.stream_flat2double(stream, options = {})
240
- noheader = Misc.process_options options, :noheader
241
- parser = TSV::Parser.new TSV.get_stream(stream), :type => :flat
242
- dumper_options = parser.options.merge(options).merge(:type => :double)
243
- dumper = TSV::Dumper.new dumper_options
244
- dumper.init unless noheader
245
- TSV.traverse parser, :into => dumper do |key,values|
246
- key = key.first if Array === key
247
- values = [values] unless Array === values
248
- [key, [values.flatten]]
249
- end
250
- dumper
251
- end
21
+ # def self.paste_streams(streams, options = {})
22
+ # options = IndiferentHash.add_defaults options, :sep => "\t", :sort => true
23
+ # sort, sep, preamble, header, same_fields, fix_flat, all_match, field_prefix = IndiferentHash.process_options options, :sort, :sep, :preamble, :header, :same_fields, :fix_flat, :all_match, :field_prefix
24
+ #
25
+ # out = Open.open_pipe do |sin|
26
+ #
27
+ # streams = streams.collect do |stream|
28
+ # case stream
29
+ # when (defined? Step and Step)
30
+ # stream.grace
31
+ # stream.stream || Open.open(stream.join.path)
32
+ # when Path
33
+ # stream.open
34
+ # when TSV::Dumper
35
+ # stream.stream
36
+ # else
37
+ # stream
38
+ # end
39
+ # end.compact
40
+ #
41
+ # num_streams = streams.length
42
+ #
43
+ # streams = streams.collect do |stream|
44
+ # sorted = Open.sort_stream(stream)
45
+ # stream.annotate sorted if stream.respond_to? :annotate
46
+ # sorted
47
+ # end if sort
48
+ #
49
+ # lines = []
50
+ # fields = []
51
+ # sizes = []
52
+ # key_fields = []
53
+ # input_options = []
54
+ # empty = []
55
+ # preambles = []
56
+ #
57
+ # streams = streams.collect do |stream|
58
+ #
59
+ # parser = TSV::Parser.new stream, **options.dup
60
+ # sfields = parser.fields
61
+ #
62
+ # if field_prefix
63
+ # index = streams.index stream
64
+ # prefix = field_prefix[index]
65
+ #
66
+ # sfields = sfields.collect{|f| [prefix, f] * ":" }
67
+ # end
68
+ #
69
+ # first_line = parser.first_line
70
+ # first_line = nil if first_line == ""
71
+ #
72
+ # lines << first_line
73
+ # key_fields << parser.key_field
74
+ # fields << sfields
75
+ # sizes << sfields.length if sfields
76
+ # input_options << parser.options
77
+ # preambles << parser.preamble if preamble and not parser.preamble.empty?
78
+ #
79
+ # stream = if fix_flat and parser.type == :flat and first_line
80
+ # parts = lines[-1].nil? ? [] : lines[-1].split("\t")
81
+ # lines[-1] = [parts[0], (parts[1..-1] || [])*"|"] * "\t"
82
+ # TSV.stream_flat2double(parser.stream, :noheader => true).stream
83
+ # else
84
+ # parser.stream
85
+ # end
86
+ #
87
+ # empty << stream if parser.first_line.nil? || parser.first_line.empty?
88
+ #
89
+ # stream
90
+ # end
91
+ #
92
+ # all_fields = fields
93
+ # key_field = key_fields.compact.first
94
+ # if same_fields
95
+ # fields = fields.first
96
+ # else
97
+ # fields = fields.compact.flatten
98
+ # end
99
+ # options = options.merge(input_options.first || {})
100
+ # options[:type] = :list if options[:type] == :single
101
+ # options[:type] = :double if fix_flat
102
+ #
103
+ # preamble_txt = case preamble
104
+ # when TrueClass
105
+ # preambles * "\n"
106
+ # when String
107
+ # if preamble[0] == '+'
108
+ # preambles * "\n" + "\n" + preamble[1..-1]
109
+ # else
110
+ # preamble
111
+ # end
112
+ # else
113
+ # nil
114
+ # end
115
+ #
116
+ # header ||= TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
117
+ # sin.puts header
118
+ #
119
+ # empty_pos = empty.collect{|stream| streams.index stream }
120
+ # empty_pos.sort.reverse.each do |i|
121
+ # key_fields.delete_at i
122
+ # input_options.delete_at i
123
+ # end
124
+ #
125
+ # begin
126
+ # done_streams = []
127
+ #
128
+ # keys = []
129
+ # parts = []
130
+ # lines.each_with_index do |line,i|
131
+ # if line.nil? || line.empty?
132
+ # keys[i] = nil
133
+ # parts[i] = nil
134
+ # else
135
+ # vs = line.chomp.split(sep, -1)
136
+ # key, *p = vs
137
+ # keys[i] = key
138
+ # parts[i] = p
139
+ # end
140
+ # sizes[i] ||= parts[i].length-1 unless parts[i].nil?
141
+ # end
142
+ #
143
+ # last_min = nil
144
+ # while lines.compact.any?
145
+ # min = keys.compact.sort.first
146
+ # break if min.nil?
147
+ # str = []
148
+ #
149
+ # skip = all_match && keys.uniq != [min]
150
+ #
151
+ # keys.each_with_index do |key,i|
152
+ # case key
153
+ # when min
154
+ # str << parts[i] * sep
155
+ #
156
+ # begin
157
+ # line = lines[i] = begin
158
+ # streams[i].gets
159
+ # rescue
160
+ # Log.exception $!
161
+ # nil
162
+ # end
163
+ # if line.nil?
164
+ # stream = streams[i]
165
+ # keys[i] = nil
166
+ # parts[i] = nil
167
+ # else
168
+ # k, *p = line.chomp.split(sep, -1)
169
+ # raise TryAgain if k == keys[i]
170
+ # keys[i] = k
171
+ # parts[i] = p.collect{|e| e.nil? ? "" : e }
172
+ # end
173
+ # rescue TryAgain
174
+ # Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
175
+ # retry
176
+ # end
177
+ # else
178
+ # if sizes[i] and sizes[i] > 0
179
+ # p = sep * (sizes[i]-1)
180
+ # str << p
181
+ # end
182
+ # end
183
+ # end
184
+ #
185
+ # next if skip
186
+ #
187
+ # if same_fields
188
+ #
189
+ # values = nil
190
+ # str.each do |part|
191
+ # next if part.nil? or part.empty?
192
+ # _p = part.split(sep,-1)
193
+ # if values.nil?
194
+ # values = _p.collect{|v| [v]}
195
+ # else
196
+ # _p.each_with_index{|v,i| values[i] ||= []; values[i] << v}
197
+ # end
198
+ # end
199
+ #
200
+ # values = [[]] * str.length if values.nil?
201
+ # values = values.collect{|list| list * "|" } * sep
202
+ #
203
+ # else
204
+ # values = str.inject(nil) do |acc,part|
205
+ # if acc.nil?
206
+ # acc = part.dup
207
+ # else
208
+ # acc << sep << part
209
+ # end
210
+ # acc
211
+ # end
212
+ # end
213
+ # text = [min, values] * sep
214
+ # sin.puts text
215
+ # end
216
+ #
217
+ # streams.each do |stream|
218
+ # stream.join if stream.respond_to? :join
219
+ # end
220
+ # rescue Aborted
221
+ # Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
222
+ # streams.each do |stream|
223
+ # stream.abort if stream.respond_to? :abort
224
+ # end
225
+ # raise $!
226
+ # rescue Exception
227
+ # Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
228
+ # streams.each do |stream|
229
+ # stream.abort if stream.respond_to? :abort
230
+ # end
231
+ # raise $!
232
+ # end
233
+ # end
234
+ #
235
+ # out
236
+ # end
237
+
238
+ # def self.stream_flat2double(stream, options = {})
239
+ # noheader = IndiferentHash.process_options options, :noheader
240
+ # parser = TSV::Parser.new TSV.get_stream(stream), :type => :flat
241
+ # dumper_options = parser.options.merge(options).merge(:type => :double)
242
+ # dumper = TSV::Dumper.new dumper_options
243
+ # dumper.init unless noheader
244
+ # TSV.traverse parser, :into => dumper do |key,values|
245
+ # key = key.first if Array === key
246
+ # values = [values] unless Array === values
247
+ # [key, [values.flatten]]
248
+ # end
249
+ # dumper
250
+ # end
252
251
 
253
252
 
254
253
  def self.reorder_stream(stream, positions, sep = "\t")
255
- Misc.open_pipe do |sin|
254
+ Open.open_pipe do |sin|
256
255
  line = stream.gets
257
256
  line.chomp! unless line.nil?
258
257
 
@@ -296,19 +295,22 @@ module TSV
296
295
 
297
296
 
298
297
  def self.reorder_stream_tsv(stream, key_field, fields=nil, zipped = true, bar = nil)
299
- parser = TSV::Parser.new TSV.get_stream(stream), :key_field => key_field, :fields => fields
298
+ parser = TSV::Parser.new TSV.get_stream(stream)
300
299
  dumper_options = parser.options
300
+ dumper_options[:key_field] = key_field
301
+ dumper_options[:fields] = fields if fields
301
302
  dumper = TSV::Dumper.new dumper_options
302
303
  dumper.init
303
304
  case parser.type
304
305
  when :single
305
- TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
306
+ TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
306
307
  key = keys.first
307
308
  [key, [values]]
308
309
  end
309
310
  when :double
310
- TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
311
+ TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
311
312
  res = []
313
+ keys = [keys] unless Array === keys
312
314
  keys.each_with_index do |key,i|
313
315
  vs = zipped ? values.collect{|l| l.length == 1 ? l : [l[i]] } : values
314
316
  res << [key, vs]
@@ -317,12 +319,12 @@ module TSV
317
319
  res
318
320
  end
319
321
  when :list
320
- TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
322
+ TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
321
323
  key = keys === Array ? keys.first : keys
322
324
  [key, values]
323
325
  end
324
326
  when :flat
325
- TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
327
+ TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
326
328
  key = keys === Array ? keys.first : keys
327
329
  [key, values]
328
330
  end