rbbt-util 5.44.1 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +67 -90
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +0 -15
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
- data/lib/rbbt/workflow/refactor.rb +153 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +19 -1
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/server +11 -1
- data/share/rbbt_commands/workflow/task +76 -71
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +40 -2
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -1,420 +1,420 @@
|
|
1
|
-
require 'rbbt/resource/path'
|
2
|
-
module TSV
|
3
|
-
|
4
|
-
def self.stream_column(file, column)
|
5
|
-
header = TSV.parse_header(file)
|
6
|
-
pos = header.fields.index(column) + 1
|
7
|
-
sep2 = header.options[:sep2] || "|"
|
8
|
-
case header.type.to_s
|
9
|
-
when nil, "double"
|
10
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
11
|
-
next if line =~ /^#/
|
12
|
-
line.split("\t")[pos].gsub(sep2, "\n")
|
13
|
-
end
|
14
|
-
when "single"
|
15
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
16
|
-
next if line =~ /^#/
|
17
|
-
line.split("\t")[1]
|
18
|
-
end
|
19
|
-
when "flat"
|
20
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
21
|
-
next if line =~ /^#/
|
22
|
-
line.split("\t")[1..-1] * "\n"
|
23
|
-
end
|
24
|
-
when 'list'
|
25
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
26
|
-
next if line =~ /^#/
|
27
|
-
line.split("\t")[pos]
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def self.guess_id(identifier_file, values, options = {})
|
33
|
-
field_matches = TSV.field_match_counts(identifier_file, values, options)
|
34
|
-
field_matches.sort_by{|field, count| count.to_i}.last
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.field_match_counts(file, values, options = {})
|
38
|
-
options =
|
39
|
-
persist_options =
|
40
|
-
|
41
|
-
filename = TSV === file ? file.filename : file
|
42
|
-
path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
|
43
|
-
tsv = TSV === file ? file : TSV.open(file, options)
|
44
|
-
|
45
|
-
text = ""
|
46
|
-
fields = nil
|
47
|
-
tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
48
|
-
names.zip(fields).each do |list, format|
|
49
|
-
list = [list] unless Array === list
|
50
|
-
list.delete_if do |name| name.empty? end
|
51
|
-
next if list.empty?
|
52
|
-
text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
53
|
-
end
|
54
|
-
text << [gene, tsv.key_field] * "\t" << "\n"
|
55
|
-
end
|
56
|
-
text
|
57
|
-
end
|
58
|
-
|
59
|
-
TmpFile.with_file(values.uniq * "\n", false) do |value_file|
|
60
|
-
cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
|
61
|
-
begin
|
62
|
-
TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
|
63
|
-
rescue
|
64
|
-
Log.exception $!
|
65
|
-
TSV.setup({}, :type => :single, :cast => :to_i)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.get_filename(file)
|
71
|
-
case
|
72
|
-
when (defined? Step and Step === file)
|
73
|
-
file.path
|
74
|
-
when Path === file
|
75
|
-
file
|
76
|
-
when (String === file and (Open.exists? file or Open.remote? file))
|
77
|
-
file
|
78
|
-
when String === file
|
79
|
-
"String-#{Misc.digest file}"
|
80
|
-
when file.respond_to?(:filename)
|
81
|
-
file.filename
|
82
|
-
when file.respond_to?(:gets)
|
83
|
-
nil
|
84
|
-
else
|
85
|
-
raise "Cannot get filename from: #{file.inspect}"
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def self.abort_stream(file, exception = nil)
|
90
|
-
return if file.nil?
|
91
|
-
if defined? Step and Step === file
|
92
|
-
if exception
|
93
|
-
file.exception exception
|
94
|
-
else
|
95
|
-
if not (file.aborted? or file.done?)
|
96
|
-
file.abort
|
97
|
-
end
|
98
|
-
end
|
99
|
-
elsif Hash === file or Array === file
|
100
|
-
return
|
101
|
-
else
|
102
|
-
stream = get_stream(file)
|
103
|
-
stream.abort(exception) if stream.respond_to? :abort
|
104
|
-
AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.get_stream(file, open_options = {})
|
109
|
-
case file
|
110
|
-
when Zlib::GzipReader
|
111
|
-
file
|
112
|
-
when (defined? Bgzf and Bgzf)
|
113
|
-
file
|
114
|
-
when TSV
|
115
|
-
file.dumper_stream
|
116
|
-
when TSV::Dumper
|
117
|
-
file.stream
|
118
|
-
when TSV::Parser
|
119
|
-
file.stream
|
120
|
-
when Path
|
121
|
-
file.open(open_options)
|
122
|
-
when (defined? Tempfile and Tempfile)
|
123
|
-
begin
|
124
|
-
pos = file.pos
|
125
|
-
file.rewind if file.respond_to?(:rewind) and pos != 0
|
126
|
-
rescue Exception
|
127
|
-
end
|
128
|
-
file
|
129
|
-
when IO, StringIO, File
|
130
|
-
begin
|
131
|
-
pos = file.pos
|
132
|
-
file.rewind if file.respond_to?(:rewind) and pos != 0
|
133
|
-
rescue
|
134
|
-
end
|
135
|
-
file
|
136
|
-
when String
|
137
|
-
if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
|
138
|
-
Open.open(file, open_options)
|
139
|
-
else
|
140
|
-
StringIO.new file
|
141
|
-
end
|
142
|
-
when (defined? Step and Step)
|
143
|
-
if file.respond_to?(:base_url)
|
144
|
-
if file.result and IO === file.result
|
145
|
-
file.result
|
146
|
-
else
|
147
|
-
file.join
|
148
|
-
get_stream(file.path, open_options.merge(:nocache => true))
|
149
|
-
end
|
150
|
-
else
|
151
|
-
file.grace
|
152
|
-
|
153
|
-
stream = file.get_stream
|
154
|
-
if stream && ! stream.closed?
|
155
|
-
stream
|
156
|
-
else
|
157
|
-
file.join
|
158
|
-
raise "Aborted stream from Step #{file.path}" if file.aborted?
|
159
|
-
raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
|
160
|
-
get_stream(file.path, open_options)
|
161
|
-
end
|
162
|
-
end
|
163
|
-
when Array
|
164
|
-
Misc.open_pipe do |sin|
|
165
|
-
file.each do |l|
|
166
|
-
sin.puts l
|
167
|
-
end
|
168
|
-
end
|
169
|
-
when Set
|
170
|
-
get_stream(file.to_a, open_options)
|
171
|
-
when Enumerable
|
172
|
-
file
|
173
|
-
else
|
174
|
-
raise "Cannot get stream from: #{file.inspect}"
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
def self.identify_field(key_field, fields, field)
|
179
|
-
case field
|
180
|
-
when nil
|
181
|
-
:key
|
182
|
-
when Symbol
|
183
|
-
field == :key ? field : identify_field(key_field, fields, field.to_s)
|
184
|
-
when Integer
|
185
|
-
field
|
186
|
-
when (fields.nil? and String)
|
187
|
-
raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
|
188
|
-
identify_field(key_field, fields, field.to_i)
|
189
|
-
when String
|
190
|
-
return :key if key_field == field
|
191
|
-
pos = fields.index field
|
192
|
-
return pos if pos
|
193
|
-
return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
|
194
|
-
if fields.select{|f| f.include?("(") }.any?
|
195
|
-
simplify_fields = fields.collect do |f|
|
196
|
-
if m = f.match(/(.*)\s+\(.*\)/)
|
197
|
-
m[1]
|
198
|
-
else
|
199
|
-
f
|
200
|
-
end
|
201
|
-
end
|
202
|
-
return identify_field(key_field, simplify_fields, field)
|
203
|
-
end
|
204
|
-
raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
|
205
|
-
else
|
206
|
-
raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
def self.header_lines(key_field, fields, entry_hash = nil)
|
213
|
-
if Hash === entry_hash
|
214
|
-
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
215
|
-
preamble = entry_hash[:preamble]
|
216
|
-
header_hash = entry_hash[:header_hash]
|
217
|
-
end
|
218
|
-
|
219
|
-
header_hash = "#" if header_hash.nil?
|
220
|
-
|
221
|
-
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
222
|
-
|
223
|
-
str = ""
|
224
|
-
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
225
|
-
if fields
|
226
|
-
if fields.empty?
|
227
|
-
str << header_hash << (key_field || "ID").to_s << "\n"
|
228
|
-
else
|
229
|
-
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
230
|
-
end
|
231
|
-
end
|
232
|
-
|
233
|
-
str
|
234
|
-
end
|
235
|
-
|
236
|
-
def identify_field(field)
|
237
|
-
TSV.identify_field(key_field, fields, field)
|
238
|
-
end
|
239
|
-
|
240
|
-
def rename_field(field, new)
|
241
|
-
self.fields = self.fields.collect{|f| f == field ? new : f }
|
242
|
-
self
|
243
|
-
end
|
244
|
-
|
245
|
-
def unzip_replicates
|
246
|
-
raise "Can only unzip replicates in :double TSVs" unless type == :double
|
247
|
-
|
248
|
-
new = {}
|
249
|
-
self.with_unnamed do
|
250
|
-
through do |k,vs|
|
251
|
-
Misc.zip_fields(vs).each_with_index do |v,i|
|
252
|
-
new[k + "(#{i})"] = v
|
253
|
-
end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
self.annotate(new)
|
258
|
-
new.type = :list
|
259
|
-
|
260
|
-
new
|
261
|
-
end
|
262
|
-
|
263
|
-
def to_list(&block)
|
264
|
-
new = {}
|
265
|
-
case type
|
266
|
-
when :double
|
267
|
-
if block_given?
|
268
|
-
through do |k,v|
|
269
|
-
if block.arity == 1
|
270
|
-
new[k] = v.collect{|e| yield e}
|
271
|
-
else
|
272
|
-
new[k] = yield k, v
|
273
|
-
end
|
274
|
-
end
|
275
|
-
else
|
276
|
-
through do |k,v|
|
277
|
-
new[k] = v.collect{|e| e.first}
|
278
|
-
end
|
279
|
-
end
|
280
|
-
when :flat
|
281
|
-
through do |k,v|
|
282
|
-
new[k] = [v.first]
|
283
|
-
end
|
284
|
-
when :single
|
285
|
-
through do |k,v|
|
286
|
-
new[k] = [v]
|
287
|
-
end
|
288
|
-
when :list
|
289
|
-
return self
|
290
|
-
end
|
291
|
-
self.annotate(new)
|
292
|
-
new.type = :list
|
293
|
-
new
|
294
|
-
end
|
295
|
-
|
296
|
-
def to_double
|
297
|
-
new = {}
|
298
|
-
case type
|
299
|
-
when :double
|
300
|
-
return self
|
301
|
-
when :flat
|
302
|
-
through do |k,v|
|
303
|
-
new[k] = v.nil? ? [] : [v]
|
304
|
-
end
|
305
|
-
when :single
|
306
|
-
through do |k,v|
|
307
|
-
new[k] = v.nil? ? [[]] : [[v]]
|
308
|
-
end
|
309
|
-
when :list
|
310
|
-
if block_given?
|
311
|
-
through do |k,v|
|
312
|
-
if v.nil?
|
313
|
-
new[k] = nil
|
314
|
-
else
|
315
|
-
new[k] = v.collect{|e| yield e}
|
316
|
-
end
|
317
|
-
end
|
318
|
-
else
|
319
|
-
through do |k,v|
|
320
|
-
if v.nil?
|
321
|
-
new[k] = nil
|
322
|
-
else
|
323
|
-
new[k] = v.collect{|e| [e]}
|
324
|
-
end
|
325
|
-
end
|
326
|
-
end
|
327
|
-
end
|
328
|
-
self.annotate(new)
|
329
|
-
new.type = :double
|
330
|
-
new
|
331
|
-
end
|
332
|
-
|
333
|
-
def to_flat(field = nil)
|
334
|
-
new = {}
|
335
|
-
case type
|
336
|
-
when :double
|
337
|
-
if field.nil?
|
338
|
-
through do |k,v| new[k] = v.first end
|
339
|
-
elsif field == :all
|
340
|
-
through do |k,v| new[k] = v.flatten.compact end
|
341
|
-
else
|
342
|
-
pos = identify_field field
|
343
|
-
through do |k,v| new[k] = v[pos] end
|
344
|
-
end
|
345
|
-
when :flat
|
346
|
-
return self
|
347
|
-
when :single
|
348
|
-
through do |k,v|
|
349
|
-
new[k] = [v]
|
350
|
-
end
|
351
|
-
when :list
|
352
|
-
through do |k,v|
|
353
|
-
new[k] = [v.first]
|
354
|
-
end
|
355
|
-
end
|
356
|
-
self.annotate(new)
|
357
|
-
if new.fields
|
358
|
-
case field
|
359
|
-
when nil
|
360
|
-
new.fields = new.fields[0..0]
|
361
|
-
when :all
|
362
|
-
new.fields = [new.fields * "+"]
|
363
|
-
else
|
364
|
-
new.fields = [field]
|
365
|
-
end
|
366
|
-
end
|
367
|
-
new.type = :flat
|
368
|
-
new
|
369
|
-
end
|
370
|
-
|
371
|
-
def to_single
|
372
|
-
new = {}
|
373
|
-
|
374
|
-
if block_given?
|
375
|
-
through do |k,v|
|
376
|
-
new[k] = yield v
|
377
|
-
end
|
378
|
-
else
|
379
|
-
case type
|
380
|
-
when :double
|
381
|
-
through do |k,v|
|
382
|
-
new[k] = v.first.first
|
383
|
-
end
|
384
|
-
when :flat
|
385
|
-
through do |k,v|
|
386
|
-
new[k] = v.first
|
387
|
-
end
|
388
|
-
when :single
|
389
|
-
return self
|
390
|
-
when :list
|
391
|
-
through do |k,v|
|
392
|
-
new[k] = v.nil? ? nil : v.first
|
393
|
-
end
|
394
|
-
end
|
395
|
-
end
|
396
|
-
|
397
|
-
self.annotate(new)
|
398
|
-
new.type = :single
|
399
|
-
new.fields = [new.fields.first] if new.fields.length > 1
|
400
|
-
new
|
401
|
-
end
|
402
|
-
|
403
|
-
|
404
|
-
def to_onehot(boolean = false)
|
405
|
-
all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
|
406
|
-
index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
|
407
|
-
index.cast = :to_i unless boolean
|
408
|
-
through do |key,values|
|
409
|
-
v = all_values.collect{|_v| values.include?(_v)}
|
410
|
-
v = v.collect{|_v| _v ? 1 : 0 } unless boolean
|
411
|
-
index[key] = v
|
412
|
-
end
|
413
|
-
index
|
414
|
-
end
|
415
|
-
|
416
|
-
def merge(other)
|
417
|
-
self.annotate(super(other))
|
418
|
-
end
|
419
|
-
end
|
420
|
-
|
1
|
+
#require 'rbbt/resource/path'
|
2
|
+
#module TSV
|
3
|
+
#
|
4
|
+
# def self.stream_column(file, column)
|
5
|
+
# header = TSV.parse_header(file)
|
6
|
+
# pos = header.fields.index(column) + 1
|
7
|
+
# sep2 = header.options[:sep2] || "|"
|
8
|
+
# case header.type.to_s
|
9
|
+
# when nil, "double"
|
10
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
11
|
+
# next if line =~ /^#/
|
12
|
+
# line.split("\t")[pos].gsub(sep2, "\n")
|
13
|
+
# end
|
14
|
+
# when "single"
|
15
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
16
|
+
# next if line =~ /^#/
|
17
|
+
# line.split("\t")[1]
|
18
|
+
# end
|
19
|
+
# when "flat"
|
20
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
21
|
+
# next if line =~ /^#/
|
22
|
+
# line.split("\t")[1..-1] * "\n"
|
23
|
+
# end
|
24
|
+
# when 'list'
|
25
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
26
|
+
# next if line =~ /^#/
|
27
|
+
# line.split("\t")[pos]
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# def self.guess_id(identifier_file, values, options = {})
|
33
|
+
# field_matches = TSV.field_match_counts(identifier_file, values, options)
|
34
|
+
# field_matches.sort_by{|field, count| count.to_i}.last
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# def self.field_match_counts(file, values, options = {})
|
38
|
+
# options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
|
39
|
+
# persist_options = IndiferentHash.pull_keys options, :persist
|
40
|
+
#
|
41
|
+
# filename = TSV === file ? file.filename : file
|
42
|
+
# path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
|
43
|
+
# tsv = TSV === file ? file : TSV.open(file, options)
|
44
|
+
#
|
45
|
+
# text = ""
|
46
|
+
# fields = nil
|
47
|
+
# tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
48
|
+
# names.zip(fields).each do |list, format|
|
49
|
+
# list = [list] unless Array === list
|
50
|
+
# list.delete_if do |name| name.empty? end
|
51
|
+
# next if list.empty?
|
52
|
+
# text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
53
|
+
# end
|
54
|
+
# text << [gene, tsv.key_field] * "\t" << "\n"
|
55
|
+
# end
|
56
|
+
# text
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# TmpFile.with_file(values.uniq * "\n", false) do |value_file|
|
60
|
+
# cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
|
61
|
+
# begin
|
62
|
+
# TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
|
63
|
+
# rescue
|
64
|
+
# Log.exception $!
|
65
|
+
# TSV.setup({}, :type => :single, :cast => :to_i)
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
#
|
70
|
+
# def self.get_filename(file)
|
71
|
+
# case
|
72
|
+
# when (defined? Step and Step === file)
|
73
|
+
# file.path
|
74
|
+
# when Path === file
|
75
|
+
# file
|
76
|
+
# when (String === file and (Open.exists? file or Open.remote? file))
|
77
|
+
# file
|
78
|
+
# when String === file
|
79
|
+
# "String-#{Misc.digest file}"
|
80
|
+
# when file.respond_to?(:filename)
|
81
|
+
# file.filename
|
82
|
+
# when file.respond_to?(:gets)
|
83
|
+
# nil
|
84
|
+
# else
|
85
|
+
# raise "Cannot get filename from: #{file.inspect}"
|
86
|
+
# end
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
# def self.abort_stream(file, exception = nil)
|
90
|
+
# return if file.nil?
|
91
|
+
# if defined? Step and Step === file
|
92
|
+
# if exception
|
93
|
+
# file.exception exception
|
94
|
+
# else
|
95
|
+
# if not (file.aborted? or file.done?)
|
96
|
+
# file.abort
|
97
|
+
# end
|
98
|
+
# end
|
99
|
+
# elsif Hash === file or Array === file
|
100
|
+
# return
|
101
|
+
# else
|
102
|
+
# stream = get_stream(file)
|
103
|
+
# stream.abort(exception) if stream.respond_to? :abort
|
104
|
+
# AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
|
105
|
+
# end
|
106
|
+
# end
|
107
|
+
#
|
108
|
+
# def self.get_stream(file, open_options = {})
|
109
|
+
# case file
|
110
|
+
# when Zlib::GzipReader
|
111
|
+
# file
|
112
|
+
# when (defined? Bgzf and Bgzf)
|
113
|
+
# file
|
114
|
+
# when TSV
|
115
|
+
# file.dumper_stream
|
116
|
+
# when TSV::Dumper
|
117
|
+
# file.stream
|
118
|
+
# when TSV::Parser
|
119
|
+
# file.stream
|
120
|
+
# when Path
|
121
|
+
# file.open(open_options)
|
122
|
+
# when (defined? Tempfile and Tempfile)
|
123
|
+
# begin
|
124
|
+
# pos = file.pos
|
125
|
+
# file.rewind if file.respond_to?(:rewind) and pos != 0
|
126
|
+
# rescue Exception
|
127
|
+
# end
|
128
|
+
# file
|
129
|
+
# when IO, StringIO, File
|
130
|
+
# begin
|
131
|
+
# pos = file.pos
|
132
|
+
# file.rewind if file.respond_to?(:rewind) and pos != 0
|
133
|
+
# rescue
|
134
|
+
# end
|
135
|
+
# file
|
136
|
+
# when String
|
137
|
+
# if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
|
138
|
+
# Open.open(file, open_options)
|
139
|
+
# else
|
140
|
+
# StringIO.new file
|
141
|
+
# end
|
142
|
+
# when (defined? Step and Step)
|
143
|
+
# if file.respond_to?(:base_url)
|
144
|
+
# if file.result and IO === file.result
|
145
|
+
# file.result
|
146
|
+
# else
|
147
|
+
# file.join
|
148
|
+
# get_stream(file.path, open_options.merge(:nocache => true))
|
149
|
+
# end
|
150
|
+
# else
|
151
|
+
# file.grace
|
152
|
+
#
|
153
|
+
# stream = file.get_stream
|
154
|
+
# if stream && ! stream.closed?
|
155
|
+
# stream
|
156
|
+
# else
|
157
|
+
# file.join
|
158
|
+
# raise "Aborted stream from Step #{file.path}" if file.aborted?
|
159
|
+
# raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
|
160
|
+
# get_stream(file.path, open_options)
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
# when Array
|
164
|
+
# Misc.open_pipe do |sin|
|
165
|
+
# file.each do |l|
|
166
|
+
# sin.puts l
|
167
|
+
# end
|
168
|
+
# end
|
169
|
+
# when Set
|
170
|
+
# get_stream(file.to_a, open_options)
|
171
|
+
# when Enumerable
|
172
|
+
# file
|
173
|
+
# else
|
174
|
+
# raise "Cannot get stream from: #{file.inspect}"
|
175
|
+
# end
|
176
|
+
# end
|
177
|
+
#
|
178
|
+
# def self.identify_field(key_field, fields, field)
|
179
|
+
# case field
|
180
|
+
# when nil
|
181
|
+
# :key
|
182
|
+
# when Symbol
|
183
|
+
# field == :key ? field : identify_field(key_field, fields, field.to_s)
|
184
|
+
# when Integer
|
185
|
+
# field
|
186
|
+
# when (fields.nil? and String)
|
187
|
+
# raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
|
188
|
+
# identify_field(key_field, fields, field.to_i)
|
189
|
+
# when String
|
190
|
+
# return :key if key_field == field
|
191
|
+
# pos = fields.index field
|
192
|
+
# return pos if pos
|
193
|
+
# return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
|
194
|
+
# if fields.select{|f| f.include?("(") }.any?
|
195
|
+
# simplify_fields = fields.collect do |f|
|
196
|
+
# if m = f.match(/(.*)\s+\(.*\)/)
|
197
|
+
# m[1]
|
198
|
+
# else
|
199
|
+
# f
|
200
|
+
# end
|
201
|
+
# end
|
202
|
+
# return identify_field(key_field, simplify_fields, field)
|
203
|
+
# end
|
204
|
+
# raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
|
205
|
+
# else
|
206
|
+
# raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
|
207
|
+
# end
|
208
|
+
# end
|
209
|
+
#
|
210
|
+
#
|
211
|
+
#
|
212
|
+
# def self.header_lines(key_field, fields, entry_hash = nil)
|
213
|
+
# if Hash === entry_hash
|
214
|
+
# sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
215
|
+
# preamble = entry_hash[:preamble]
|
216
|
+
# header_hash = entry_hash[:header_hash]
|
217
|
+
# end
|
218
|
+
#
|
219
|
+
# header_hash = "#" if header_hash.nil?
|
220
|
+
#
|
221
|
+
# preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
222
|
+
#
|
223
|
+
# str = ""
|
224
|
+
# str << preamble.strip << "\n" if preamble and not preamble.empty?
|
225
|
+
# if fields
|
226
|
+
# if fields.empty?
|
227
|
+
# str << header_hash << (key_field || "ID").to_s << "\n"
|
228
|
+
# else
|
229
|
+
# str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
230
|
+
# end
|
231
|
+
# end
|
232
|
+
#
|
233
|
+
# str
|
234
|
+
# end
|
235
|
+
#
|
236
|
+
# def identify_field(field)
|
237
|
+
# TSV.identify_field(key_field, fields, field)
|
238
|
+
# end
|
239
|
+
#
|
240
|
+
# def rename_field(field, new)
|
241
|
+
# self.fields = self.fields.collect{|f| f == field ? new : f }
|
242
|
+
# self
|
243
|
+
# end
|
244
|
+
#
|
245
|
+
# def unzip_replicates
|
246
|
+
# raise "Can only unzip replicates in :double TSVs" unless type == :double
|
247
|
+
#
|
248
|
+
# new = {}
|
249
|
+
# self.with_unnamed do
|
250
|
+
# through do |k,vs|
|
251
|
+
# Misc.zip_fields(vs).each_with_index do |v,i|
|
252
|
+
# new[k + "(#{i})"] = v
|
253
|
+
# end
|
254
|
+
# end
|
255
|
+
# end
|
256
|
+
#
|
257
|
+
# self.annotate(new)
|
258
|
+
# new.type = :list
|
259
|
+
#
|
260
|
+
# new
|
261
|
+
# end
|
262
|
+
#
|
263
|
+
# def to_list(&block)
|
264
|
+
# new = {}
|
265
|
+
# case type
|
266
|
+
# when :double
|
267
|
+
# if block_given?
|
268
|
+
# through do |k,v|
|
269
|
+
# if block.arity == 1
|
270
|
+
# new[k] = v.collect{|e| yield e}
|
271
|
+
# else
|
272
|
+
# new[k] = yield k, v
|
273
|
+
# end
|
274
|
+
# end
|
275
|
+
# else
|
276
|
+
# through do |k,v|
|
277
|
+
# new[k] = v.collect{|e| e.first}
|
278
|
+
# end
|
279
|
+
# end
|
280
|
+
# when :flat
|
281
|
+
# through do |k,v|
|
282
|
+
# new[k] = [v.first]
|
283
|
+
# end
|
284
|
+
# when :single
|
285
|
+
# through do |k,v|
|
286
|
+
# new[k] = [v]
|
287
|
+
# end
|
288
|
+
# when :list
|
289
|
+
# return self
|
290
|
+
# end
|
291
|
+
# self.annotate(new)
|
292
|
+
# new.type = :list
|
293
|
+
# new
|
294
|
+
# end
|
295
|
+
#
|
296
|
+
# def to_double
|
297
|
+
# new = {}
|
298
|
+
# case type
|
299
|
+
# when :double
|
300
|
+
# return self
|
301
|
+
# when :flat
|
302
|
+
# through do |k,v|
|
303
|
+
# new[k] = v.nil? ? [] : [v]
|
304
|
+
# end
|
305
|
+
# when :single
|
306
|
+
# through do |k,v|
|
307
|
+
# new[k] = v.nil? ? [[]] : [[v]]
|
308
|
+
# end
|
309
|
+
# when :list
|
310
|
+
# if block_given?
|
311
|
+
# through do |k,v|
|
312
|
+
# if v.nil?
|
313
|
+
# new[k] = nil
|
314
|
+
# else
|
315
|
+
# new[k] = v.collect{|e| yield e}
|
316
|
+
# end
|
317
|
+
# end
|
318
|
+
# else
|
319
|
+
# through do |k,v|
|
320
|
+
# if v.nil?
|
321
|
+
# new[k] = nil
|
322
|
+
# else
|
323
|
+
# new[k] = v.collect{|e| [e]}
|
324
|
+
# end
|
325
|
+
# end
|
326
|
+
# end
|
327
|
+
# end
|
328
|
+
# self.annotate(new)
|
329
|
+
# new.type = :double
|
330
|
+
# new
|
331
|
+
# end
|
332
|
+
#
|
333
|
+
# def to_flat(field = nil)
|
334
|
+
# new = {}
|
335
|
+
# case type
|
336
|
+
# when :double
|
337
|
+
# if field.nil?
|
338
|
+
# through do |k,v| new[k] = v.first end
|
339
|
+
# elsif field == :all
|
340
|
+
# through do |k,v| new[k] = v.flatten.compact end
|
341
|
+
# else
|
342
|
+
# pos = identify_field field
|
343
|
+
# through do |k,v| new[k] = v[pos] end
|
344
|
+
# end
|
345
|
+
# when :flat
|
346
|
+
# return self
|
347
|
+
# when :single
|
348
|
+
# through do |k,v|
|
349
|
+
# new[k] = [v]
|
350
|
+
# end
|
351
|
+
# when :list
|
352
|
+
# through do |k,v|
|
353
|
+
# new[k] = [v.first]
|
354
|
+
# end
|
355
|
+
# end
|
356
|
+
# self.annotate(new)
|
357
|
+
# if new.fields
|
358
|
+
# case field
|
359
|
+
# when nil
|
360
|
+
# new.fields = new.fields[0..0]
|
361
|
+
# when :all
|
362
|
+
# new.fields = [new.fields * "+"]
|
363
|
+
# else
|
364
|
+
# new.fields = [field]
|
365
|
+
# end
|
366
|
+
# end
|
367
|
+
# new.type = :flat
|
368
|
+
# new
|
369
|
+
# end
|
370
|
+
#
|
371
|
+
# def to_single
|
372
|
+
# new = {}
|
373
|
+
#
|
374
|
+
# if block_given?
|
375
|
+
# through do |k,v|
|
376
|
+
# new[k] = yield v
|
377
|
+
# end
|
378
|
+
# else
|
379
|
+
# case type
|
380
|
+
# when :double
|
381
|
+
# through do |k,v|
|
382
|
+
# new[k] = v.first.first
|
383
|
+
# end
|
384
|
+
# when :flat
|
385
|
+
# through do |k,v|
|
386
|
+
# new[k] = v.first
|
387
|
+
# end
|
388
|
+
# when :single
|
389
|
+
# return self
|
390
|
+
# when :list
|
391
|
+
# through do |k,v|
|
392
|
+
# new[k] = v.nil? ? nil : v.first
|
393
|
+
# end
|
394
|
+
# end
|
395
|
+
# end
|
396
|
+
#
|
397
|
+
# self.annotate(new)
|
398
|
+
# new.type = :single
|
399
|
+
# new.fields = [new.fields.first] if new.fields.length > 1
|
400
|
+
# new
|
401
|
+
# end
|
402
|
+
#
|
403
|
+
#
|
404
|
+
# def to_onehot(boolean = false)
|
405
|
+
# all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
|
406
|
+
# index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
|
407
|
+
# index.cast = :to_i unless boolean
|
408
|
+
# through do |key,values|
|
409
|
+
# v = all_values.collect{|_v| values.include?(_v)}
|
410
|
+
# v = v.collect{|_v| _v ? 1 : 0 } unless boolean
|
411
|
+
# index[key] = v
|
412
|
+
# end
|
413
|
+
# index
|
414
|
+
# end
|
415
|
+
#
|
416
|
+
# def merge(other)
|
417
|
+
# self.annotate(super(other))
|
418
|
+
# end
|
419
|
+
#end
|
420
|
+
#
|