rbbt-util 5.44.1 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +67 -90
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +0 -15
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
- data/lib/rbbt/workflow/refactor.rb +153 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +19 -1
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/server +11 -1
- data/share/rbbt_commands/workflow/task +76 -71
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +40 -2
data/lib/rbbt/tsv/stream.rb
CHANGED
@@ -1,258 +1,257 @@
|
|
1
|
-
require 'rbbt/tsv/dumper'
|
2
1
|
module TSV
|
3
2
|
|
4
|
-
def self.collapse_stream(input, options = {}, &block)
|
5
|
-
|
6
|
-
|
3
|
+
#def self.collapse_stream(input, options = {}, &block)
|
4
|
+
# options = IndiferentHash.add_defaults options, :sep => "\t", :header_hash => '#', :uniq => true
|
5
|
+
# input_stream = TSV.get_stream input
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
# header_hash = options[:header_hash]
|
8
|
+
# cmd_args = options[:uniq] ? "-u" : nil
|
10
9
|
|
11
|
-
|
10
|
+
# sorted_input_stream = Open.sort_stream input_stream, header_hash, cmd_args
|
12
11
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
12
|
+
# parser = TSV::Parser.new(sorted_input_stream, options.dup)
|
13
|
+
# dumper = TSV::Dumper.new parser
|
14
|
+
# header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
|
15
|
+
# dumper.close_in
|
16
|
+
# dumper.close_out
|
17
|
+
# dumper.stream = Open.collapse_stream parser.stream, parser.first_line, parser.sep, header, &block
|
18
|
+
# dumper
|
19
|
+
#end
|
21
20
|
|
22
|
-
def self.paste_streams(streams, options = {})
|
23
|
-
options =
|
24
|
-
sort, sep, preamble, header, same_fields, fix_flat, all_match, field_prefix =
|
25
|
-
|
26
|
-
out =
|
27
|
-
|
28
|
-
streams = streams.collect do |stream|
|
29
|
-
case stream
|
30
|
-
when (defined? Step and Step)
|
31
|
-
stream.grace
|
32
|
-
stream.
|
33
|
-
when Path
|
34
|
-
stream.open
|
35
|
-
when TSV::Dumper
|
36
|
-
stream.stream
|
37
|
-
else
|
38
|
-
stream
|
39
|
-
end
|
40
|
-
end.compact
|
41
|
-
|
42
|
-
num_streams = streams.length
|
43
|
-
|
44
|
-
streams = streams.collect do |stream|
|
45
|
-
sorted =
|
46
|
-
stream.annotate sorted if stream.respond_to? :annotate
|
47
|
-
sorted
|
48
|
-
end if sort
|
49
|
-
|
50
|
-
lines = []
|
51
|
-
fields = []
|
52
|
-
sizes = []
|
53
|
-
key_fields = []
|
54
|
-
input_options = []
|
55
|
-
empty = []
|
56
|
-
preambles = []
|
57
|
-
|
58
|
-
streams = streams.collect do |stream|
|
59
|
-
|
60
|
-
parser = TSV::Parser.new stream, options.dup
|
61
|
-
sfields = parser.fields
|
62
|
-
|
63
|
-
if field_prefix
|
64
|
-
index = streams.index stream
|
65
|
-
prefix = field_prefix[index]
|
66
|
-
|
67
|
-
sfields = sfields.collect{|f| [prefix, f] * ":" }
|
68
|
-
end
|
69
|
-
|
70
|
-
first_line = parser.first_line
|
71
|
-
first_line = nil if first_line == ""
|
72
|
-
|
73
|
-
lines << first_line
|
74
|
-
key_fields << parser.key_field
|
75
|
-
fields << sfields
|
76
|
-
sizes << sfields.length if sfields
|
77
|
-
input_options << parser.options
|
78
|
-
preambles << parser.preamble if preamble and not parser.preamble.empty?
|
79
|
-
|
80
|
-
stream = if fix_flat and parser.type == :flat and first_line
|
81
|
-
parts = lines[-1].nil? ? [] : lines[-1].split("\t")
|
82
|
-
lines[-1] = [parts[0], (parts[1..-1] || [])*"|"] * "\t"
|
83
|
-
TSV.stream_flat2double(parser.stream, :noheader => true).stream
|
84
|
-
else
|
85
|
-
parser.stream
|
86
|
-
end
|
87
|
-
|
88
|
-
empty << stream if parser.first_line.nil? || parser.first_line.empty?
|
89
|
-
|
90
|
-
stream
|
91
|
-
end
|
92
|
-
|
93
|
-
all_fields = fields
|
94
|
-
key_field = key_fields.compact.first
|
95
|
-
if same_fields
|
96
|
-
fields = fields.first
|
97
|
-
else
|
98
|
-
fields = fields.compact.flatten
|
99
|
-
end
|
100
|
-
options = options.merge(input_options.first || {})
|
101
|
-
options[:type] = :list if options[:type] == :single
|
102
|
-
options[:type] = :double if fix_flat
|
103
|
-
|
104
|
-
preamble_txt = case preamble
|
105
|
-
when TrueClass
|
106
|
-
preambles * "\n"
|
107
|
-
when String
|
108
|
-
if preamble[0] == '+'
|
109
|
-
preambles * "\n" + "\n" + preamble[1..-1]
|
110
|
-
else
|
111
|
-
preamble
|
112
|
-
end
|
113
|
-
else
|
114
|
-
nil
|
115
|
-
end
|
116
|
-
|
117
|
-
header ||= TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
|
118
|
-
sin.puts header
|
119
|
-
|
120
|
-
empty_pos = empty.collect{|stream| streams.index stream }
|
121
|
-
empty_pos.sort.reverse.each do |i|
|
122
|
-
key_fields.delete_at i
|
123
|
-
input_options.delete_at i
|
124
|
-
end
|
125
|
-
|
126
|
-
begin
|
127
|
-
done_streams = []
|
128
|
-
|
129
|
-
keys = []
|
130
|
-
parts = []
|
131
|
-
lines.each_with_index do |line,i|
|
132
|
-
if line.nil? || line.empty?
|
133
|
-
keys[i] = nil
|
134
|
-
parts[i] = nil
|
135
|
-
else
|
136
|
-
vs = line.chomp.split(sep, -1)
|
137
|
-
key, *p = vs
|
138
|
-
keys[i] = key
|
139
|
-
parts[i] = p
|
140
|
-
end
|
141
|
-
sizes[i] ||= parts[i].length-1 unless parts[i].nil?
|
142
|
-
end
|
143
|
-
|
144
|
-
last_min = nil
|
145
|
-
while lines.compact.any?
|
146
|
-
min = keys.compact.sort.first
|
147
|
-
break if min.nil?
|
148
|
-
str = []
|
149
|
-
|
150
|
-
skip = all_match && keys.uniq != [min]
|
151
|
-
|
152
|
-
keys.each_with_index do |key,i|
|
153
|
-
case key
|
154
|
-
when min
|
155
|
-
str << parts[i] * sep
|
156
|
-
|
157
|
-
begin
|
158
|
-
line = lines[i] = begin
|
159
|
-
streams[i].gets
|
160
|
-
rescue
|
161
|
-
Log.exception $!
|
162
|
-
nil
|
163
|
-
end
|
164
|
-
if line.nil?
|
165
|
-
stream = streams[i]
|
166
|
-
keys[i] = nil
|
167
|
-
parts[i] = nil
|
168
|
-
else
|
169
|
-
k, *p = line.chomp.split(sep, -1)
|
170
|
-
raise TryAgain if k == keys[i]
|
171
|
-
keys[i] = k
|
172
|
-
parts[i] = p.collect{|e| e.nil? ? "" : e }
|
173
|
-
end
|
174
|
-
rescue TryAgain
|
175
|
-
Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
|
176
|
-
retry
|
177
|
-
end
|
178
|
-
else
|
179
|
-
if sizes[i] and sizes[i] > 0
|
180
|
-
p = sep * (sizes[i]-1)
|
181
|
-
str << p
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
next if skip
|
187
|
-
|
188
|
-
if same_fields
|
189
|
-
|
190
|
-
values = nil
|
191
|
-
str.each do |part|
|
192
|
-
next if part.nil? or part.empty?
|
193
|
-
_p = part.split(sep,-1)
|
194
|
-
if values.nil?
|
195
|
-
values = _p.collect{|v| [v]}
|
196
|
-
else
|
197
|
-
_p.each_with_index{|v,i| values[i] ||= []; values[i] << v}
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
values = [[]] * str.length if values.nil?
|
202
|
-
values = values.collect{|list| list * "|" } * sep
|
203
|
-
|
204
|
-
else
|
205
|
-
values = str.inject(nil) do |acc,part|
|
206
|
-
if acc.nil?
|
207
|
-
acc = part.dup
|
208
|
-
else
|
209
|
-
acc << sep << part
|
210
|
-
end
|
211
|
-
acc
|
212
|
-
end
|
213
|
-
end
|
214
|
-
text = [min, values] * sep
|
215
|
-
sin.puts text
|
216
|
-
end
|
217
|
-
|
218
|
-
streams.each do |stream|
|
219
|
-
stream.join if stream.respond_to? :join
|
220
|
-
end
|
221
|
-
rescue Aborted
|
222
|
-
Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
223
|
-
streams.each do |stream|
|
224
|
-
stream.abort if stream.respond_to? :abort
|
225
|
-
end
|
226
|
-
raise $!
|
227
|
-
rescue Exception
|
228
|
-
Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
229
|
-
streams.each do |stream|
|
230
|
-
stream.abort if stream.respond_to? :abort
|
231
|
-
end
|
232
|
-
raise $!
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
out
|
237
|
-
end
|
238
|
-
|
239
|
-
def self.stream_flat2double(stream, options = {})
|
240
|
-
noheader =
|
241
|
-
parser = TSV::Parser.new TSV.get_stream(stream), :type => :flat
|
242
|
-
dumper_options = parser.options.merge(options).merge(:type => :double)
|
243
|
-
dumper = TSV::Dumper.new dumper_options
|
244
|
-
dumper.init unless noheader
|
245
|
-
TSV.traverse parser, :into => dumper do |key,values|
|
246
|
-
key = key.first if Array === key
|
247
|
-
values = [values] unless Array === values
|
248
|
-
[key, [values.flatten]]
|
249
|
-
end
|
250
|
-
dumper
|
251
|
-
end
|
21
|
+
# def self.paste_streams(streams, options = {})
|
22
|
+
# options = IndiferentHash.add_defaults options, :sep => "\t", :sort => true
|
23
|
+
# sort, sep, preamble, header, same_fields, fix_flat, all_match, field_prefix = IndiferentHash.process_options options, :sort, :sep, :preamble, :header, :same_fields, :fix_flat, :all_match, :field_prefix
|
24
|
+
#
|
25
|
+
# out = Open.open_pipe do |sin|
|
26
|
+
#
|
27
|
+
# streams = streams.collect do |stream|
|
28
|
+
# case stream
|
29
|
+
# when (defined? Step and Step)
|
30
|
+
# stream.grace
|
31
|
+
# stream.stream || Open.open(stream.join.path)
|
32
|
+
# when Path
|
33
|
+
# stream.open
|
34
|
+
# when TSV::Dumper
|
35
|
+
# stream.stream
|
36
|
+
# else
|
37
|
+
# stream
|
38
|
+
# end
|
39
|
+
# end.compact
|
40
|
+
#
|
41
|
+
# num_streams = streams.length
|
42
|
+
#
|
43
|
+
# streams = streams.collect do |stream|
|
44
|
+
# sorted = Open.sort_stream(stream)
|
45
|
+
# stream.annotate sorted if stream.respond_to? :annotate
|
46
|
+
# sorted
|
47
|
+
# end if sort
|
48
|
+
#
|
49
|
+
# lines = []
|
50
|
+
# fields = []
|
51
|
+
# sizes = []
|
52
|
+
# key_fields = []
|
53
|
+
# input_options = []
|
54
|
+
# empty = []
|
55
|
+
# preambles = []
|
56
|
+
#
|
57
|
+
# streams = streams.collect do |stream|
|
58
|
+
#
|
59
|
+
# parser = TSV::Parser.new stream, **options.dup
|
60
|
+
# sfields = parser.fields
|
61
|
+
#
|
62
|
+
# if field_prefix
|
63
|
+
# index = streams.index stream
|
64
|
+
# prefix = field_prefix[index]
|
65
|
+
#
|
66
|
+
# sfields = sfields.collect{|f| [prefix, f] * ":" }
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
# first_line = parser.first_line
|
70
|
+
# first_line = nil if first_line == ""
|
71
|
+
#
|
72
|
+
# lines << first_line
|
73
|
+
# key_fields << parser.key_field
|
74
|
+
# fields << sfields
|
75
|
+
# sizes << sfields.length if sfields
|
76
|
+
# input_options << parser.options
|
77
|
+
# preambles << parser.preamble if preamble and not parser.preamble.empty?
|
78
|
+
#
|
79
|
+
# stream = if fix_flat and parser.type == :flat and first_line
|
80
|
+
# parts = lines[-1].nil? ? [] : lines[-1].split("\t")
|
81
|
+
# lines[-1] = [parts[0], (parts[1..-1] || [])*"|"] * "\t"
|
82
|
+
# TSV.stream_flat2double(parser.stream, :noheader => true).stream
|
83
|
+
# else
|
84
|
+
# parser.stream
|
85
|
+
# end
|
86
|
+
#
|
87
|
+
# empty << stream if parser.first_line.nil? || parser.first_line.empty?
|
88
|
+
#
|
89
|
+
# stream
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
# all_fields = fields
|
93
|
+
# key_field = key_fields.compact.first
|
94
|
+
# if same_fields
|
95
|
+
# fields = fields.first
|
96
|
+
# else
|
97
|
+
# fields = fields.compact.flatten
|
98
|
+
# end
|
99
|
+
# options = options.merge(input_options.first || {})
|
100
|
+
# options[:type] = :list if options[:type] == :single
|
101
|
+
# options[:type] = :double if fix_flat
|
102
|
+
#
|
103
|
+
# preamble_txt = case preamble
|
104
|
+
# when TrueClass
|
105
|
+
# preambles * "\n"
|
106
|
+
# when String
|
107
|
+
# if preamble[0] == '+'
|
108
|
+
# preambles * "\n" + "\n" + preamble[1..-1]
|
109
|
+
# else
|
110
|
+
# preamble
|
111
|
+
# end
|
112
|
+
# else
|
113
|
+
# nil
|
114
|
+
# end
|
115
|
+
#
|
116
|
+
# header ||= TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
|
117
|
+
# sin.puts header
|
118
|
+
#
|
119
|
+
# empty_pos = empty.collect{|stream| streams.index stream }
|
120
|
+
# empty_pos.sort.reverse.each do |i|
|
121
|
+
# key_fields.delete_at i
|
122
|
+
# input_options.delete_at i
|
123
|
+
# end
|
124
|
+
#
|
125
|
+
# begin
|
126
|
+
# done_streams = []
|
127
|
+
#
|
128
|
+
# keys = []
|
129
|
+
# parts = []
|
130
|
+
# lines.each_with_index do |line,i|
|
131
|
+
# if line.nil? || line.empty?
|
132
|
+
# keys[i] = nil
|
133
|
+
# parts[i] = nil
|
134
|
+
# else
|
135
|
+
# vs = line.chomp.split(sep, -1)
|
136
|
+
# key, *p = vs
|
137
|
+
# keys[i] = key
|
138
|
+
# parts[i] = p
|
139
|
+
# end
|
140
|
+
# sizes[i] ||= parts[i].length-1 unless parts[i].nil?
|
141
|
+
# end
|
142
|
+
#
|
143
|
+
# last_min = nil
|
144
|
+
# while lines.compact.any?
|
145
|
+
# min = keys.compact.sort.first
|
146
|
+
# break if min.nil?
|
147
|
+
# str = []
|
148
|
+
#
|
149
|
+
# skip = all_match && keys.uniq != [min]
|
150
|
+
#
|
151
|
+
# keys.each_with_index do |key,i|
|
152
|
+
# case key
|
153
|
+
# when min
|
154
|
+
# str << parts[i] * sep
|
155
|
+
#
|
156
|
+
# begin
|
157
|
+
# line = lines[i] = begin
|
158
|
+
# streams[i].gets
|
159
|
+
# rescue
|
160
|
+
# Log.exception $!
|
161
|
+
# nil
|
162
|
+
# end
|
163
|
+
# if line.nil?
|
164
|
+
# stream = streams[i]
|
165
|
+
# keys[i] = nil
|
166
|
+
# parts[i] = nil
|
167
|
+
# else
|
168
|
+
# k, *p = line.chomp.split(sep, -1)
|
169
|
+
# raise TryAgain if k == keys[i]
|
170
|
+
# keys[i] = k
|
171
|
+
# parts[i] = p.collect{|e| e.nil? ? "" : e }
|
172
|
+
# end
|
173
|
+
# rescue TryAgain
|
174
|
+
# Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
|
175
|
+
# retry
|
176
|
+
# end
|
177
|
+
# else
|
178
|
+
# if sizes[i] and sizes[i] > 0
|
179
|
+
# p = sep * (sizes[i]-1)
|
180
|
+
# str << p
|
181
|
+
# end
|
182
|
+
# end
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# next if skip
|
186
|
+
#
|
187
|
+
# if same_fields
|
188
|
+
#
|
189
|
+
# values = nil
|
190
|
+
# str.each do |part|
|
191
|
+
# next if part.nil? or part.empty?
|
192
|
+
# _p = part.split(sep,-1)
|
193
|
+
# if values.nil?
|
194
|
+
# values = _p.collect{|v| [v]}
|
195
|
+
# else
|
196
|
+
# _p.each_with_index{|v,i| values[i] ||= []; values[i] << v}
|
197
|
+
# end
|
198
|
+
# end
|
199
|
+
#
|
200
|
+
# values = [[]] * str.length if values.nil?
|
201
|
+
# values = values.collect{|list| list * "|" } * sep
|
202
|
+
#
|
203
|
+
# else
|
204
|
+
# values = str.inject(nil) do |acc,part|
|
205
|
+
# if acc.nil?
|
206
|
+
# acc = part.dup
|
207
|
+
# else
|
208
|
+
# acc << sep << part
|
209
|
+
# end
|
210
|
+
# acc
|
211
|
+
# end
|
212
|
+
# end
|
213
|
+
# text = [min, values] * sep
|
214
|
+
# sin.puts text
|
215
|
+
# end
|
216
|
+
#
|
217
|
+
# streams.each do |stream|
|
218
|
+
# stream.join if stream.respond_to? :join
|
219
|
+
# end
|
220
|
+
# rescue Aborted
|
221
|
+
# Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
222
|
+
# streams.each do |stream|
|
223
|
+
# stream.abort if stream.respond_to? :abort
|
224
|
+
# end
|
225
|
+
# raise $!
|
226
|
+
# rescue Exception
|
227
|
+
# Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
228
|
+
# streams.each do |stream|
|
229
|
+
# stream.abort if stream.respond_to? :abort
|
230
|
+
# end
|
231
|
+
# raise $!
|
232
|
+
# end
|
233
|
+
# end
|
234
|
+
#
|
235
|
+
# out
|
236
|
+
# end
|
237
|
+
|
238
|
+
# def self.stream_flat2double(stream, options = {})
|
239
|
+
# noheader = IndiferentHash.process_options options, :noheader
|
240
|
+
# parser = TSV::Parser.new TSV.get_stream(stream), :type => :flat
|
241
|
+
# dumper_options = parser.options.merge(options).merge(:type => :double)
|
242
|
+
# dumper = TSV::Dumper.new dumper_options
|
243
|
+
# dumper.init unless noheader
|
244
|
+
# TSV.traverse parser, :into => dumper do |key,values|
|
245
|
+
# key = key.first if Array === key
|
246
|
+
# values = [values] unless Array === values
|
247
|
+
# [key, [values.flatten]]
|
248
|
+
# end
|
249
|
+
# dumper
|
250
|
+
# end
|
252
251
|
|
253
252
|
|
254
253
|
def self.reorder_stream(stream, positions, sep = "\t")
|
255
|
-
|
254
|
+
Open.open_pipe do |sin|
|
256
255
|
line = stream.gets
|
257
256
|
line.chomp! unless line.nil?
|
258
257
|
|
@@ -296,19 +295,22 @@ module TSV
|
|
296
295
|
|
297
296
|
|
298
297
|
def self.reorder_stream_tsv(stream, key_field, fields=nil, zipped = true, bar = nil)
|
299
|
-
parser = TSV::Parser.new TSV.get_stream(stream)
|
298
|
+
parser = TSV::Parser.new TSV.get_stream(stream)
|
300
299
|
dumper_options = parser.options
|
300
|
+
dumper_options[:key_field] = key_field
|
301
|
+
dumper_options[:fields] = fields if fields
|
301
302
|
dumper = TSV::Dumper.new dumper_options
|
302
303
|
dumper.init
|
303
304
|
case parser.type
|
304
305
|
when :single
|
305
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
306
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
306
307
|
key = keys.first
|
307
308
|
[key, [values]]
|
308
309
|
end
|
309
310
|
when :double
|
310
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
311
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
311
312
|
res = []
|
313
|
+
keys = [keys] unless Array === keys
|
312
314
|
keys.each_with_index do |key,i|
|
313
315
|
vs = zipped ? values.collect{|l| l.length == 1 ? l : [l[i]] } : values
|
314
316
|
res << [key, vs]
|
@@ -317,12 +319,12 @@ module TSV
|
|
317
319
|
res
|
318
320
|
end
|
319
321
|
when :list
|
320
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
322
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
321
323
|
key = keys === Array ? keys.first : keys
|
322
324
|
[key, values]
|
323
325
|
end
|
324
326
|
when :flat
|
325
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
327
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
326
328
|
key = keys === Array ? keys.first : keys
|
327
329
|
[key, values]
|
328
330
|
end
|