rbbt-util 5.44.1 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/bin/rbbt +67 -90
- data/bin/rbbt_exec.rb +2 -2
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/filecache.rb +1 -1
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +2 -2
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +92 -105
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
- data/lib/rbbt/workflow/refactor.rb +150 -0
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +96 -4
- data/python/rbbt/workflow/remote.py +104 -0
- data/python/rbbt/workflow.py +64 -0
- data/python/test.py +10 -0
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/retry +43 -0
- data/share/rbbt_commands/workflow/server +12 -2
- data/share/rbbt_commands/workflow/task +80 -73
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +45 -6
data/lib/rbbt/tsv/stream.rb
CHANGED
@@ -1,258 +1,257 @@
|
|
1
|
-
require 'rbbt/tsv/dumper'
|
2
1
|
module TSV
|
3
2
|
|
4
|
-
def self.collapse_stream(input, options = {}, &block)
|
5
|
-
|
6
|
-
|
3
|
+
#def self.collapse_stream(input, options = {}, &block)
|
4
|
+
# options = IndiferentHash.add_defaults options, :sep => "\t", :header_hash => '#', :uniq => true
|
5
|
+
# input_stream = TSV.get_stream input
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
# header_hash = options[:header_hash]
|
8
|
+
# cmd_args = options[:uniq] ? "-u" : nil
|
10
9
|
|
11
|
-
|
10
|
+
# sorted_input_stream = Open.sort_stream input_stream, header_hash, cmd_args
|
12
11
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
12
|
+
# parser = TSV::Parser.new(sorted_input_stream, options.dup)
|
13
|
+
# dumper = TSV::Dumper.new parser
|
14
|
+
# header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
|
15
|
+
# dumper.close_in
|
16
|
+
# dumper.close_out
|
17
|
+
# dumper.stream = Open.collapse_stream parser.stream, parser.first_line, parser.sep, header, &block
|
18
|
+
# dumper
|
19
|
+
#end
|
21
20
|
|
22
|
-
def self.paste_streams(streams, options = {})
|
23
|
-
options =
|
24
|
-
sort, sep, preamble, header, same_fields, fix_flat, all_match, field_prefix =
|
25
|
-
|
26
|
-
out =
|
27
|
-
|
28
|
-
streams = streams.collect do |stream|
|
29
|
-
case stream
|
30
|
-
when (defined? Step and Step)
|
31
|
-
stream.grace
|
32
|
-
stream.
|
33
|
-
when Path
|
34
|
-
stream.open
|
35
|
-
when TSV::Dumper
|
36
|
-
stream.stream
|
37
|
-
else
|
38
|
-
stream
|
39
|
-
end
|
40
|
-
end.compact
|
41
|
-
|
42
|
-
num_streams = streams.length
|
43
|
-
|
44
|
-
streams = streams.collect do |stream|
|
45
|
-
sorted =
|
46
|
-
stream.annotate sorted if stream.respond_to? :annotate
|
47
|
-
sorted
|
48
|
-
end if sort
|
49
|
-
|
50
|
-
lines = []
|
51
|
-
fields = []
|
52
|
-
sizes = []
|
53
|
-
key_fields = []
|
54
|
-
input_options = []
|
55
|
-
empty = []
|
56
|
-
preambles = []
|
57
|
-
|
58
|
-
streams = streams.collect do |stream|
|
59
|
-
|
60
|
-
parser = TSV::Parser.new stream, options.dup
|
61
|
-
sfields = parser.fields
|
62
|
-
|
63
|
-
if field_prefix
|
64
|
-
index = streams.index stream
|
65
|
-
prefix = field_prefix[index]
|
66
|
-
|
67
|
-
sfields = sfields.collect{|f| [prefix, f] * ":" }
|
68
|
-
end
|
69
|
-
|
70
|
-
first_line = parser.first_line
|
71
|
-
first_line = nil if first_line == ""
|
72
|
-
|
73
|
-
lines << first_line
|
74
|
-
key_fields << parser.key_field
|
75
|
-
fields << sfields
|
76
|
-
sizes << sfields.length if sfields
|
77
|
-
input_options << parser.options
|
78
|
-
preambles << parser.preamble if preamble and not parser.preamble.empty?
|
79
|
-
|
80
|
-
stream = if fix_flat and parser.type == :flat and first_line
|
81
|
-
parts = lines[-1].nil? ? [] : lines[-1].split("\t")
|
82
|
-
lines[-1] = [parts[0], (parts[1..-1] || [])*"|"] * "\t"
|
83
|
-
TSV.stream_flat2double(parser.stream, :noheader => true).stream
|
84
|
-
else
|
85
|
-
parser.stream
|
86
|
-
end
|
87
|
-
|
88
|
-
empty << stream if parser.first_line.nil? || parser.first_line.empty?
|
89
|
-
|
90
|
-
stream
|
91
|
-
end
|
92
|
-
|
93
|
-
all_fields = fields
|
94
|
-
key_field = key_fields.compact.first
|
95
|
-
if same_fields
|
96
|
-
fields = fields.first
|
97
|
-
else
|
98
|
-
fields = fields.compact.flatten
|
99
|
-
end
|
100
|
-
options = options.merge(input_options.first || {})
|
101
|
-
options[:type] = :list if options[:type] == :single
|
102
|
-
options[:type] = :double if fix_flat
|
103
|
-
|
104
|
-
preamble_txt = case preamble
|
105
|
-
when TrueClass
|
106
|
-
preambles * "\n"
|
107
|
-
when String
|
108
|
-
if preamble[0] == '+'
|
109
|
-
preambles * "\n" + "\n" + preamble[1..-1]
|
110
|
-
else
|
111
|
-
preamble
|
112
|
-
end
|
113
|
-
else
|
114
|
-
nil
|
115
|
-
end
|
116
|
-
|
117
|
-
header ||= TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
|
118
|
-
sin.puts header
|
119
|
-
|
120
|
-
empty_pos = empty.collect{|stream| streams.index stream }
|
121
|
-
empty_pos.sort.reverse.each do |i|
|
122
|
-
key_fields.delete_at i
|
123
|
-
input_options.delete_at i
|
124
|
-
end
|
125
|
-
|
126
|
-
begin
|
127
|
-
done_streams = []
|
128
|
-
|
129
|
-
keys = []
|
130
|
-
parts = []
|
131
|
-
lines.each_with_index do |line,i|
|
132
|
-
if line.nil? || line.empty?
|
133
|
-
keys[i] = nil
|
134
|
-
parts[i] = nil
|
135
|
-
else
|
136
|
-
vs = line.chomp.split(sep, -1)
|
137
|
-
key, *p = vs
|
138
|
-
keys[i] = key
|
139
|
-
parts[i] = p
|
140
|
-
end
|
141
|
-
sizes[i] ||= parts[i].length-1 unless parts[i].nil?
|
142
|
-
end
|
143
|
-
|
144
|
-
last_min = nil
|
145
|
-
while lines.compact.any?
|
146
|
-
min = keys.compact.sort.first
|
147
|
-
break if min.nil?
|
148
|
-
str = []
|
149
|
-
|
150
|
-
skip = all_match && keys.uniq != [min]
|
151
|
-
|
152
|
-
keys.each_with_index do |key,i|
|
153
|
-
case key
|
154
|
-
when min
|
155
|
-
str << parts[i] * sep
|
156
|
-
|
157
|
-
begin
|
158
|
-
line = lines[i] = begin
|
159
|
-
streams[i].gets
|
160
|
-
rescue
|
161
|
-
Log.exception $!
|
162
|
-
nil
|
163
|
-
end
|
164
|
-
if line.nil?
|
165
|
-
stream = streams[i]
|
166
|
-
keys[i] = nil
|
167
|
-
parts[i] = nil
|
168
|
-
else
|
169
|
-
k, *p = line.chomp.split(sep, -1)
|
170
|
-
raise TryAgain if k == keys[i]
|
171
|
-
keys[i] = k
|
172
|
-
parts[i] = p.collect{|e| e.nil? ? "" : e }
|
173
|
-
end
|
174
|
-
rescue TryAgain
|
175
|
-
Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
|
176
|
-
retry
|
177
|
-
end
|
178
|
-
else
|
179
|
-
if sizes[i] and sizes[i] > 0
|
180
|
-
p = sep * (sizes[i]-1)
|
181
|
-
str << p
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
next if skip
|
187
|
-
|
188
|
-
if same_fields
|
189
|
-
|
190
|
-
values = nil
|
191
|
-
str.each do |part|
|
192
|
-
next if part.nil? or part.empty?
|
193
|
-
_p = part.split(sep,-1)
|
194
|
-
if values.nil?
|
195
|
-
values = _p.collect{|v| [v]}
|
196
|
-
else
|
197
|
-
_p.each_with_index{|v,i| values[i] ||= []; values[i] << v}
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
values = [[]] * str.length if values.nil?
|
202
|
-
values = values.collect{|list| list * "|" } * sep
|
203
|
-
|
204
|
-
else
|
205
|
-
values = str.inject(nil) do |acc,part|
|
206
|
-
if acc.nil?
|
207
|
-
acc = part.dup
|
208
|
-
else
|
209
|
-
acc << sep << part
|
210
|
-
end
|
211
|
-
acc
|
212
|
-
end
|
213
|
-
end
|
214
|
-
text = [min, values] * sep
|
215
|
-
sin.puts text
|
216
|
-
end
|
217
|
-
|
218
|
-
streams.each do |stream|
|
219
|
-
stream.join if stream.respond_to? :join
|
220
|
-
end
|
221
|
-
rescue Aborted
|
222
|
-
Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
223
|
-
streams.each do |stream|
|
224
|
-
stream.abort if stream.respond_to? :abort
|
225
|
-
end
|
226
|
-
raise $!
|
227
|
-
rescue Exception
|
228
|
-
Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
229
|
-
streams.each do |stream|
|
230
|
-
stream.abort if stream.respond_to? :abort
|
231
|
-
end
|
232
|
-
raise $!
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
out
|
237
|
-
end
|
238
|
-
|
239
|
-
def self.stream_flat2double(stream, options = {})
|
240
|
-
noheader =
|
241
|
-
parser = TSV::Parser.new TSV.get_stream(stream), :type => :flat
|
242
|
-
dumper_options = parser.options.merge(options).merge(:type => :double)
|
243
|
-
dumper = TSV::Dumper.new dumper_options
|
244
|
-
dumper.init unless noheader
|
245
|
-
TSV.traverse parser, :into => dumper do |key,values|
|
246
|
-
key = key.first if Array === key
|
247
|
-
values = [values] unless Array === values
|
248
|
-
[key, [values.flatten]]
|
249
|
-
end
|
250
|
-
dumper
|
251
|
-
end
|
21
|
+
# def self.paste_streams(streams, options = {})
|
22
|
+
# options = IndiferentHash.add_defaults options, :sep => "\t", :sort => true
|
23
|
+
# sort, sep, preamble, header, same_fields, fix_flat, all_match, field_prefix = IndiferentHash.process_options options, :sort, :sep, :preamble, :header, :same_fields, :fix_flat, :all_match, :field_prefix
|
24
|
+
#
|
25
|
+
# out = Open.open_pipe do |sin|
|
26
|
+
#
|
27
|
+
# streams = streams.collect do |stream|
|
28
|
+
# case stream
|
29
|
+
# when (defined? Step and Step)
|
30
|
+
# stream.grace
|
31
|
+
# stream.stream || Open.open(stream.join.path)
|
32
|
+
# when Path
|
33
|
+
# stream.open
|
34
|
+
# when TSV::Dumper
|
35
|
+
# stream.stream
|
36
|
+
# else
|
37
|
+
# stream
|
38
|
+
# end
|
39
|
+
# end.compact
|
40
|
+
#
|
41
|
+
# num_streams = streams.length
|
42
|
+
#
|
43
|
+
# streams = streams.collect do |stream|
|
44
|
+
# sorted = Open.sort_stream(stream)
|
45
|
+
# stream.annotate sorted if stream.respond_to? :annotate
|
46
|
+
# sorted
|
47
|
+
# end if sort
|
48
|
+
#
|
49
|
+
# lines = []
|
50
|
+
# fields = []
|
51
|
+
# sizes = []
|
52
|
+
# key_fields = []
|
53
|
+
# input_options = []
|
54
|
+
# empty = []
|
55
|
+
# preambles = []
|
56
|
+
#
|
57
|
+
# streams = streams.collect do |stream|
|
58
|
+
#
|
59
|
+
# parser = TSV::Parser.new stream, **options.dup
|
60
|
+
# sfields = parser.fields
|
61
|
+
#
|
62
|
+
# if field_prefix
|
63
|
+
# index = streams.index stream
|
64
|
+
# prefix = field_prefix[index]
|
65
|
+
#
|
66
|
+
# sfields = sfields.collect{|f| [prefix, f] * ":" }
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
# first_line = parser.first_line
|
70
|
+
# first_line = nil if first_line == ""
|
71
|
+
#
|
72
|
+
# lines << first_line
|
73
|
+
# key_fields << parser.key_field
|
74
|
+
# fields << sfields
|
75
|
+
# sizes << sfields.length if sfields
|
76
|
+
# input_options << parser.options
|
77
|
+
# preambles << parser.preamble if preamble and not parser.preamble.empty?
|
78
|
+
#
|
79
|
+
# stream = if fix_flat and parser.type == :flat and first_line
|
80
|
+
# parts = lines[-1].nil? ? [] : lines[-1].split("\t")
|
81
|
+
# lines[-1] = [parts[0], (parts[1..-1] || [])*"|"] * "\t"
|
82
|
+
# TSV.stream_flat2double(parser.stream, :noheader => true).stream
|
83
|
+
# else
|
84
|
+
# parser.stream
|
85
|
+
# end
|
86
|
+
#
|
87
|
+
# empty << stream if parser.first_line.nil? || parser.first_line.empty?
|
88
|
+
#
|
89
|
+
# stream
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
# all_fields = fields
|
93
|
+
# key_field = key_fields.compact.first
|
94
|
+
# if same_fields
|
95
|
+
# fields = fields.first
|
96
|
+
# else
|
97
|
+
# fields = fields.compact.flatten
|
98
|
+
# end
|
99
|
+
# options = options.merge(input_options.first || {})
|
100
|
+
# options[:type] = :list if options[:type] == :single
|
101
|
+
# options[:type] = :double if fix_flat
|
102
|
+
#
|
103
|
+
# preamble_txt = case preamble
|
104
|
+
# when TrueClass
|
105
|
+
# preambles * "\n"
|
106
|
+
# when String
|
107
|
+
# if preamble[0] == '+'
|
108
|
+
# preambles * "\n" + "\n" + preamble[1..-1]
|
109
|
+
# else
|
110
|
+
# preamble
|
111
|
+
# end
|
112
|
+
# else
|
113
|
+
# nil
|
114
|
+
# end
|
115
|
+
#
|
116
|
+
# header ||= TSV.header_lines(key_field, fields, options.merge(:preamble => preamble_txt))
|
117
|
+
# sin.puts header
|
118
|
+
#
|
119
|
+
# empty_pos = empty.collect{|stream| streams.index stream }
|
120
|
+
# empty_pos.sort.reverse.each do |i|
|
121
|
+
# key_fields.delete_at i
|
122
|
+
# input_options.delete_at i
|
123
|
+
# end
|
124
|
+
#
|
125
|
+
# begin
|
126
|
+
# done_streams = []
|
127
|
+
#
|
128
|
+
# keys = []
|
129
|
+
# parts = []
|
130
|
+
# lines.each_with_index do |line,i|
|
131
|
+
# if line.nil? || line.empty?
|
132
|
+
# keys[i] = nil
|
133
|
+
# parts[i] = nil
|
134
|
+
# else
|
135
|
+
# vs = line.chomp.split(sep, -1)
|
136
|
+
# key, *p = vs
|
137
|
+
# keys[i] = key
|
138
|
+
# parts[i] = p
|
139
|
+
# end
|
140
|
+
# sizes[i] ||= parts[i].length-1 unless parts[i].nil?
|
141
|
+
# end
|
142
|
+
#
|
143
|
+
# last_min = nil
|
144
|
+
# while lines.compact.any?
|
145
|
+
# min = keys.compact.sort.first
|
146
|
+
# break if min.nil?
|
147
|
+
# str = []
|
148
|
+
#
|
149
|
+
# skip = all_match && keys.uniq != [min]
|
150
|
+
#
|
151
|
+
# keys.each_with_index do |key,i|
|
152
|
+
# case key
|
153
|
+
# when min
|
154
|
+
# str << parts[i] * sep
|
155
|
+
#
|
156
|
+
# begin
|
157
|
+
# line = lines[i] = begin
|
158
|
+
# streams[i].gets
|
159
|
+
# rescue
|
160
|
+
# Log.exception $!
|
161
|
+
# nil
|
162
|
+
# end
|
163
|
+
# if line.nil?
|
164
|
+
# stream = streams[i]
|
165
|
+
# keys[i] = nil
|
166
|
+
# parts[i] = nil
|
167
|
+
# else
|
168
|
+
# k, *p = line.chomp.split(sep, -1)
|
169
|
+
# raise TryAgain if k == keys[i]
|
170
|
+
# keys[i] = k
|
171
|
+
# parts[i] = p.collect{|e| e.nil? ? "" : e }
|
172
|
+
# end
|
173
|
+
# rescue TryAgain
|
174
|
+
# Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
|
175
|
+
# retry
|
176
|
+
# end
|
177
|
+
# else
|
178
|
+
# if sizes[i] and sizes[i] > 0
|
179
|
+
# p = sep * (sizes[i]-1)
|
180
|
+
# str << p
|
181
|
+
# end
|
182
|
+
# end
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# next if skip
|
186
|
+
#
|
187
|
+
# if same_fields
|
188
|
+
#
|
189
|
+
# values = nil
|
190
|
+
# str.each do |part|
|
191
|
+
# next if part.nil? or part.empty?
|
192
|
+
# _p = part.split(sep,-1)
|
193
|
+
# if values.nil?
|
194
|
+
# values = _p.collect{|v| [v]}
|
195
|
+
# else
|
196
|
+
# _p.each_with_index{|v,i| values[i] ||= []; values[i] << v}
|
197
|
+
# end
|
198
|
+
# end
|
199
|
+
#
|
200
|
+
# values = [[]] * str.length if values.nil?
|
201
|
+
# values = values.collect{|list| list * "|" } * sep
|
202
|
+
#
|
203
|
+
# else
|
204
|
+
# values = str.inject(nil) do |acc,part|
|
205
|
+
# if acc.nil?
|
206
|
+
# acc = part.dup
|
207
|
+
# else
|
208
|
+
# acc << sep << part
|
209
|
+
# end
|
210
|
+
# acc
|
211
|
+
# end
|
212
|
+
# end
|
213
|
+
# text = [min, values] * sep
|
214
|
+
# sin.puts text
|
215
|
+
# end
|
216
|
+
#
|
217
|
+
# streams.each do |stream|
|
218
|
+
# stream.join if stream.respond_to? :join
|
219
|
+
# end
|
220
|
+
# rescue Aborted
|
221
|
+
# Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
222
|
+
# streams.each do |stream|
|
223
|
+
# stream.abort if stream.respond_to? :abort
|
224
|
+
# end
|
225
|
+
# raise $!
|
226
|
+
# rescue Exception
|
227
|
+
# Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
228
|
+
# streams.each do |stream|
|
229
|
+
# stream.abort if stream.respond_to? :abort
|
230
|
+
# end
|
231
|
+
# raise $!
|
232
|
+
# end
|
233
|
+
# end
|
234
|
+
#
|
235
|
+
# out
|
236
|
+
# end
|
237
|
+
|
238
|
+
# def self.stream_flat2double(stream, options = {})
|
239
|
+
# noheader = IndiferentHash.process_options options, :noheader
|
240
|
+
# parser = TSV::Parser.new TSV.get_stream(stream), :type => :flat
|
241
|
+
# dumper_options = parser.options.merge(options).merge(:type => :double)
|
242
|
+
# dumper = TSV::Dumper.new dumper_options
|
243
|
+
# dumper.init unless noheader
|
244
|
+
# TSV.traverse parser, :into => dumper do |key,values|
|
245
|
+
# key = key.first if Array === key
|
246
|
+
# values = [values] unless Array === values
|
247
|
+
# [key, [values.flatten]]
|
248
|
+
# end
|
249
|
+
# dumper
|
250
|
+
# end
|
252
251
|
|
253
252
|
|
254
253
|
def self.reorder_stream(stream, positions, sep = "\t")
|
255
|
-
|
254
|
+
Open.open_pipe do |sin|
|
256
255
|
line = stream.gets
|
257
256
|
line.chomp! unless line.nil?
|
258
257
|
|
@@ -296,19 +295,22 @@ module TSV
|
|
296
295
|
|
297
296
|
|
298
297
|
def self.reorder_stream_tsv(stream, key_field, fields=nil, zipped = true, bar = nil)
|
299
|
-
parser = TSV::Parser.new TSV.get_stream(stream)
|
298
|
+
parser = TSV::Parser.new TSV.get_stream(stream)
|
300
299
|
dumper_options = parser.options
|
300
|
+
dumper_options[:key_field] = key_field
|
301
|
+
dumper_options[:fields] = fields if fields
|
301
302
|
dumper = TSV::Dumper.new dumper_options
|
302
303
|
dumper.init
|
303
304
|
case parser.type
|
304
305
|
when :single
|
305
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
306
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
306
307
|
key = keys.first
|
307
308
|
[key, [values]]
|
308
309
|
end
|
309
310
|
when :double
|
310
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
311
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
311
312
|
res = []
|
313
|
+
keys = [keys] unless Array === keys
|
312
314
|
keys.each_with_index do |key,i|
|
313
315
|
vs = zipped ? values.collect{|l| l.length == 1 ? l : [l[i]] } : values
|
314
316
|
res << [key, vs]
|
@@ -317,12 +319,12 @@ module TSV
|
|
317
319
|
res
|
318
320
|
end
|
319
321
|
when :list
|
320
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
322
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
321
323
|
key = keys === Array ? keys.first : keys
|
322
324
|
[key, values]
|
323
325
|
end
|
324
326
|
when :flat
|
325
|
-
TSV.traverse parser, :into => dumper, :bar => bar do |keys,values|
|
327
|
+
TSV.traverse parser, :key_field => key_field, :fields => fields, :into => dumper, :bar => bar do |keys,values|
|
326
328
|
key = keys === Array ? keys.first : keys
|
327
329
|
[key, values]
|
328
330
|
end
|