rbbt-util 5.44.1 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/bin/rbbt +67 -90
- data/bin/rbbt_exec.rb +2 -2
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/filecache.rb +1 -1
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +2 -2
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +92 -105
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
- data/lib/rbbt/workflow/refactor.rb +150 -0
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +96 -4
- data/python/rbbt/workflow/remote.py +104 -0
- data/python/rbbt/workflow.py +64 -0
- data/python/test.py +10 -0
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/retry +43 -0
- data/share/rbbt_commands/workflow/server +12 -2
- data/share/rbbt_commands/workflow/task +80 -73
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +45 -6
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -1,420 +1,420 @@
|
|
1
|
-
require 'rbbt/resource/path'
|
2
|
-
module TSV
|
3
|
-
|
4
|
-
def self.stream_column(file, column)
|
5
|
-
header = TSV.parse_header(file)
|
6
|
-
pos = header.fields.index(column) + 1
|
7
|
-
sep2 = header.options[:sep2] || "|"
|
8
|
-
case header.type.to_s
|
9
|
-
when nil, "double"
|
10
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
11
|
-
next if line =~ /^#/
|
12
|
-
line.split("\t")[pos].gsub(sep2, "\n")
|
13
|
-
end
|
14
|
-
when "single"
|
15
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
16
|
-
next if line =~ /^#/
|
17
|
-
line.split("\t")[1]
|
18
|
-
end
|
19
|
-
when "flat"
|
20
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
21
|
-
next if line =~ /^#/
|
22
|
-
line.split("\t")[1..-1] * "\n"
|
23
|
-
end
|
24
|
-
when 'list'
|
25
|
-
TSV.traverse file, :type => :array, :into => :stream do |line|
|
26
|
-
next if line =~ /^#/
|
27
|
-
line.split("\t")[pos]
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def self.guess_id(identifier_file, values, options = {})
|
33
|
-
field_matches = TSV.field_match_counts(identifier_file, values, options)
|
34
|
-
field_matches.sort_by{|field, count| count.to_i}.last
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.field_match_counts(file, values, options = {})
|
38
|
-
options =
|
39
|
-
persist_options =
|
40
|
-
|
41
|
-
filename = TSV === file ? file.filename : file
|
42
|
-
path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
|
43
|
-
tsv = TSV === file ? file : TSV.open(file, options)
|
44
|
-
|
45
|
-
text = ""
|
46
|
-
fields = nil
|
47
|
-
tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
48
|
-
names.zip(fields).each do |list, format|
|
49
|
-
list = [list] unless Array === list
|
50
|
-
list.delete_if do |name| name.empty? end
|
51
|
-
next if list.empty?
|
52
|
-
text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
53
|
-
end
|
54
|
-
text << [gene, tsv.key_field] * "\t" << "\n"
|
55
|
-
end
|
56
|
-
text
|
57
|
-
end
|
58
|
-
|
59
|
-
TmpFile.with_file(values.uniq * "\n", false) do |value_file|
|
60
|
-
cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
|
61
|
-
begin
|
62
|
-
TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
|
63
|
-
rescue
|
64
|
-
Log.exception $!
|
65
|
-
TSV.setup({}, :type => :single, :cast => :to_i)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.get_filename(file)
|
71
|
-
case
|
72
|
-
when (defined? Step and Step === file)
|
73
|
-
file.path
|
74
|
-
when Path === file
|
75
|
-
file
|
76
|
-
when (String === file and (Open.exists? file or Open.remote? file))
|
77
|
-
file
|
78
|
-
when String === file
|
79
|
-
"String-#{Misc.digest file}"
|
80
|
-
when file.respond_to?(:filename)
|
81
|
-
file.filename
|
82
|
-
when file.respond_to?(:gets)
|
83
|
-
nil
|
84
|
-
else
|
85
|
-
raise "Cannot get filename from: #{file.inspect}"
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def self.abort_stream(file, exception = nil)
|
90
|
-
return if file.nil?
|
91
|
-
if defined? Step and Step === file
|
92
|
-
if exception
|
93
|
-
file.exception exception
|
94
|
-
else
|
95
|
-
if not (file.aborted? or file.done?)
|
96
|
-
file.abort
|
97
|
-
end
|
98
|
-
end
|
99
|
-
elsif Hash === file or Array === file
|
100
|
-
return
|
101
|
-
else
|
102
|
-
stream = get_stream(file)
|
103
|
-
stream.abort(exception) if stream.respond_to? :abort
|
104
|
-
AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.get_stream(file, open_options = {})
|
109
|
-
case file
|
110
|
-
when Zlib::GzipReader
|
111
|
-
file
|
112
|
-
when (defined? Bgzf and Bgzf)
|
113
|
-
file
|
114
|
-
when TSV
|
115
|
-
file.dumper_stream
|
116
|
-
when TSV::Dumper
|
117
|
-
file.stream
|
118
|
-
when TSV::Parser
|
119
|
-
file.stream
|
120
|
-
when Path
|
121
|
-
file.open(open_options)
|
122
|
-
when (defined? Tempfile and Tempfile)
|
123
|
-
begin
|
124
|
-
pos = file.pos
|
125
|
-
file.rewind if file.respond_to?(:rewind) and pos != 0
|
126
|
-
rescue Exception
|
127
|
-
end
|
128
|
-
file
|
129
|
-
when IO, StringIO, File
|
130
|
-
begin
|
131
|
-
pos = file.pos
|
132
|
-
file.rewind if file.respond_to?(:rewind) and pos != 0
|
133
|
-
rescue
|
134
|
-
end
|
135
|
-
file
|
136
|
-
when String
|
137
|
-
if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
|
138
|
-
Open.open(file, open_options)
|
139
|
-
else
|
140
|
-
StringIO.new file
|
141
|
-
end
|
142
|
-
when (defined? Step and Step)
|
143
|
-
if file.respond_to?(:base_url)
|
144
|
-
if file.result and IO === file.result
|
145
|
-
file.result
|
146
|
-
else
|
147
|
-
file.join
|
148
|
-
get_stream(file.path, open_options.merge(:nocache => true))
|
149
|
-
end
|
150
|
-
else
|
151
|
-
file.grace
|
152
|
-
|
153
|
-
stream = file.get_stream
|
154
|
-
if stream && ! stream.closed?
|
155
|
-
stream
|
156
|
-
else
|
157
|
-
file.join
|
158
|
-
raise "Aborted stream from Step #{file.path}" if file.aborted?
|
159
|
-
raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
|
160
|
-
get_stream(file.path, open_options)
|
161
|
-
end
|
162
|
-
end
|
163
|
-
when Array
|
164
|
-
Misc.open_pipe do |sin|
|
165
|
-
file.each do |l|
|
166
|
-
sin.puts l
|
167
|
-
end
|
168
|
-
end
|
169
|
-
when Set
|
170
|
-
get_stream(file.to_a, open_options)
|
171
|
-
when Enumerable
|
172
|
-
file
|
173
|
-
else
|
174
|
-
raise "Cannot get stream from: #{file.inspect}"
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
def self.identify_field(key_field, fields, field)
|
179
|
-
case field
|
180
|
-
when nil
|
181
|
-
:key
|
182
|
-
when Symbol
|
183
|
-
field == :key ? field : identify_field(key_field, fields, field.to_s)
|
184
|
-
when Integer
|
185
|
-
field
|
186
|
-
when (fields.nil? and String)
|
187
|
-
raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
|
188
|
-
identify_field(key_field, fields, field.to_i)
|
189
|
-
when String
|
190
|
-
return :key if key_field == field
|
191
|
-
pos = fields.index field
|
192
|
-
return pos if pos
|
193
|
-
return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
|
194
|
-
if fields.select{|f| f.include?("(") }.any?
|
195
|
-
simplify_fields = fields.collect do |f|
|
196
|
-
if m = f.match(/(.*)\s+\(.*\)/)
|
197
|
-
m[1]
|
198
|
-
else
|
199
|
-
f
|
200
|
-
end
|
201
|
-
end
|
202
|
-
return identify_field(key_field, simplify_fields, field)
|
203
|
-
end
|
204
|
-
raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
|
205
|
-
else
|
206
|
-
raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
def self.header_lines(key_field, fields, entry_hash = nil)
|
213
|
-
if Hash === entry_hash
|
214
|
-
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
215
|
-
preamble = entry_hash[:preamble]
|
216
|
-
header_hash = entry_hash[:header_hash]
|
217
|
-
end
|
218
|
-
|
219
|
-
header_hash = "#" if header_hash.nil?
|
220
|
-
|
221
|
-
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
222
|
-
|
223
|
-
str = ""
|
224
|
-
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
225
|
-
if fields
|
226
|
-
if fields.empty?
|
227
|
-
str << header_hash << (key_field || "ID").to_s << "\n"
|
228
|
-
else
|
229
|
-
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
230
|
-
end
|
231
|
-
end
|
232
|
-
|
233
|
-
str
|
234
|
-
end
|
235
|
-
|
236
|
-
def identify_field(field)
|
237
|
-
TSV.identify_field(key_field, fields, field)
|
238
|
-
end
|
239
|
-
|
240
|
-
def rename_field(field, new)
|
241
|
-
self.fields = self.fields.collect{|f| f == field ? new : f }
|
242
|
-
self
|
243
|
-
end
|
244
|
-
|
245
|
-
def unzip_replicates
|
246
|
-
raise "Can only unzip replicates in :double TSVs" unless type == :double
|
247
|
-
|
248
|
-
new = {}
|
249
|
-
self.with_unnamed do
|
250
|
-
through do |k,vs|
|
251
|
-
Misc.zip_fields(vs).each_with_index do |v,i|
|
252
|
-
new[k + "(#{i})"] = v
|
253
|
-
end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
self.annotate(new)
|
258
|
-
new.type = :list
|
259
|
-
|
260
|
-
new
|
261
|
-
end
|
262
|
-
|
263
|
-
def to_list(&block)
|
264
|
-
new = {}
|
265
|
-
case type
|
266
|
-
when :double
|
267
|
-
if block_given?
|
268
|
-
through do |k,v|
|
269
|
-
if block.arity == 1
|
270
|
-
new[k] = v.collect{|e| yield e}
|
271
|
-
else
|
272
|
-
new[k] = yield k, v
|
273
|
-
end
|
274
|
-
end
|
275
|
-
else
|
276
|
-
through do |k,v|
|
277
|
-
new[k] = v.collect{|e| e.first}
|
278
|
-
end
|
279
|
-
end
|
280
|
-
when :flat
|
281
|
-
through do |k,v|
|
282
|
-
new[k] = [v.first]
|
283
|
-
end
|
284
|
-
when :single
|
285
|
-
through do |k,v|
|
286
|
-
new[k] = [v]
|
287
|
-
end
|
288
|
-
when :list
|
289
|
-
return self
|
290
|
-
end
|
291
|
-
self.annotate(new)
|
292
|
-
new.type = :list
|
293
|
-
new
|
294
|
-
end
|
295
|
-
|
296
|
-
def to_double
|
297
|
-
new = {}
|
298
|
-
case type
|
299
|
-
when :double
|
300
|
-
return self
|
301
|
-
when :flat
|
302
|
-
through do |k,v|
|
303
|
-
new[k] = v.nil? ? [] : [v]
|
304
|
-
end
|
305
|
-
when :single
|
306
|
-
through do |k,v|
|
307
|
-
new[k] = v.nil? ? [[]] : [[v]]
|
308
|
-
end
|
309
|
-
when :list
|
310
|
-
if block_given?
|
311
|
-
through do |k,v|
|
312
|
-
if v.nil?
|
313
|
-
new[k] = nil
|
314
|
-
else
|
315
|
-
new[k] = v.collect{|e| yield e}
|
316
|
-
end
|
317
|
-
end
|
318
|
-
else
|
319
|
-
through do |k,v|
|
320
|
-
if v.nil?
|
321
|
-
new[k] = nil
|
322
|
-
else
|
323
|
-
new[k] = v.collect{|e| [e]}
|
324
|
-
end
|
325
|
-
end
|
326
|
-
end
|
327
|
-
end
|
328
|
-
self.annotate(new)
|
329
|
-
new.type = :double
|
330
|
-
new
|
331
|
-
end
|
332
|
-
|
333
|
-
def to_flat(field = nil)
|
334
|
-
new = {}
|
335
|
-
case type
|
336
|
-
when :double
|
337
|
-
if field.nil?
|
338
|
-
through do |k,v| new[k] = v.first end
|
339
|
-
elsif field == :all
|
340
|
-
through do |k,v| new[k] = v.flatten.compact end
|
341
|
-
else
|
342
|
-
pos = identify_field field
|
343
|
-
through do |k,v| new[k] = v[pos] end
|
344
|
-
end
|
345
|
-
when :flat
|
346
|
-
return self
|
347
|
-
when :single
|
348
|
-
through do |k,v|
|
349
|
-
new[k] = [v]
|
350
|
-
end
|
351
|
-
when :list
|
352
|
-
through do |k,v|
|
353
|
-
new[k] = [v.first]
|
354
|
-
end
|
355
|
-
end
|
356
|
-
self.annotate(new)
|
357
|
-
if new.fields
|
358
|
-
case field
|
359
|
-
when nil
|
360
|
-
new.fields = new.fields[0..0]
|
361
|
-
when :all
|
362
|
-
new.fields = [new.fields * "+"]
|
363
|
-
else
|
364
|
-
new.fields = [field]
|
365
|
-
end
|
366
|
-
end
|
367
|
-
new.type = :flat
|
368
|
-
new
|
369
|
-
end
|
370
|
-
|
371
|
-
def to_single
|
372
|
-
new = {}
|
373
|
-
|
374
|
-
if block_given?
|
375
|
-
through do |k,v|
|
376
|
-
new[k] = yield v
|
377
|
-
end
|
378
|
-
else
|
379
|
-
case type
|
380
|
-
when :double
|
381
|
-
through do |k,v|
|
382
|
-
new[k] = v.first.first
|
383
|
-
end
|
384
|
-
when :flat
|
385
|
-
through do |k,v|
|
386
|
-
new[k] = v.first
|
387
|
-
end
|
388
|
-
when :single
|
389
|
-
return self
|
390
|
-
when :list
|
391
|
-
through do |k,v|
|
392
|
-
new[k] = v.nil? ? nil : v.first
|
393
|
-
end
|
394
|
-
end
|
395
|
-
end
|
396
|
-
|
397
|
-
self.annotate(new)
|
398
|
-
new.type = :single
|
399
|
-
new.fields = [new.fields.first] if new.fields.length > 1
|
400
|
-
new
|
401
|
-
end
|
402
|
-
|
403
|
-
|
404
|
-
def to_onehot(boolean = false)
|
405
|
-
all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
|
406
|
-
index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
|
407
|
-
index.cast = :to_i unless boolean
|
408
|
-
through do |key,values|
|
409
|
-
v = all_values.collect{|_v| values.include?(_v)}
|
410
|
-
v = v.collect{|_v| _v ? 1 : 0 } unless boolean
|
411
|
-
index[key] = v
|
412
|
-
end
|
413
|
-
index
|
414
|
-
end
|
415
|
-
|
416
|
-
def merge(other)
|
417
|
-
self.annotate(super(other))
|
418
|
-
end
|
419
|
-
end
|
420
|
-
|
1
|
+
#require 'rbbt/resource/path'
|
2
|
+
#module TSV
|
3
|
+
#
|
4
|
+
# def self.stream_column(file, column)
|
5
|
+
# header = TSV.parse_header(file)
|
6
|
+
# pos = header.fields.index(column) + 1
|
7
|
+
# sep2 = header.options[:sep2] || "|"
|
8
|
+
# case header.type.to_s
|
9
|
+
# when nil, "double"
|
10
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
11
|
+
# next if line =~ /^#/
|
12
|
+
# line.split("\t")[pos].gsub(sep2, "\n")
|
13
|
+
# end
|
14
|
+
# when "single"
|
15
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
16
|
+
# next if line =~ /^#/
|
17
|
+
# line.split("\t")[1]
|
18
|
+
# end
|
19
|
+
# when "flat"
|
20
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
21
|
+
# next if line =~ /^#/
|
22
|
+
# line.split("\t")[1..-1] * "\n"
|
23
|
+
# end
|
24
|
+
# when 'list'
|
25
|
+
# TSV.traverse file, :type => :array, :into => :stream do |line|
|
26
|
+
# next if line =~ /^#/
|
27
|
+
# line.split("\t")[pos]
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# def self.guess_id(identifier_file, values, options = {})
|
33
|
+
# field_matches = TSV.field_match_counts(identifier_file, values, options)
|
34
|
+
# field_matches.sort_by{|field, count| count.to_i}.last
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# def self.field_match_counts(file, values, options = {})
|
38
|
+
# options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
|
39
|
+
# persist_options = IndiferentHash.pull_keys options, :persist
|
40
|
+
#
|
41
|
+
# filename = TSV === file ? file.filename : file
|
42
|
+
# path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
|
43
|
+
# tsv = TSV === file ? file : TSV.open(file, options)
|
44
|
+
#
|
45
|
+
# text = ""
|
46
|
+
# fields = nil
|
47
|
+
# tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
48
|
+
# names.zip(fields).each do |list, format|
|
49
|
+
# list = [list] unless Array === list
|
50
|
+
# list.delete_if do |name| name.empty? end
|
51
|
+
# next if list.empty?
|
52
|
+
# text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
53
|
+
# end
|
54
|
+
# text << [gene, tsv.key_field] * "\t" << "\n"
|
55
|
+
# end
|
56
|
+
# text
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# TmpFile.with_file(values.uniq * "\n", false) do |value_file|
|
60
|
+
# cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
|
61
|
+
# begin
|
62
|
+
# TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
|
63
|
+
# rescue
|
64
|
+
# Log.exception $!
|
65
|
+
# TSV.setup({}, :type => :single, :cast => :to_i)
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
#
|
70
|
+
# def self.get_filename(file)
|
71
|
+
# case
|
72
|
+
# when (defined? Step and Step === file)
|
73
|
+
# file.path
|
74
|
+
# when Path === file
|
75
|
+
# file
|
76
|
+
# when (String === file and (Open.exists? file or Open.remote? file))
|
77
|
+
# file
|
78
|
+
# when String === file
|
79
|
+
# "String-#{Misc.digest file}"
|
80
|
+
# when file.respond_to?(:filename)
|
81
|
+
# file.filename
|
82
|
+
# when file.respond_to?(:gets)
|
83
|
+
# nil
|
84
|
+
# else
|
85
|
+
# raise "Cannot get filename from: #{file.inspect}"
|
86
|
+
# end
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
# def self.abort_stream(file, exception = nil)
|
90
|
+
# return if file.nil?
|
91
|
+
# if defined? Step and Step === file
|
92
|
+
# if exception
|
93
|
+
# file.exception exception
|
94
|
+
# else
|
95
|
+
# if not (file.aborted? or file.done?)
|
96
|
+
# file.abort
|
97
|
+
# end
|
98
|
+
# end
|
99
|
+
# elsif Hash === file or Array === file
|
100
|
+
# return
|
101
|
+
# else
|
102
|
+
# stream = get_stream(file)
|
103
|
+
# stream.abort(exception) if stream.respond_to? :abort
|
104
|
+
# AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
|
105
|
+
# end
|
106
|
+
# end
|
107
|
+
#
|
108
|
+
# def self.get_stream(file, open_options = {})
|
109
|
+
# case file
|
110
|
+
# when Zlib::GzipReader
|
111
|
+
# file
|
112
|
+
# when (defined? Bgzf and Bgzf)
|
113
|
+
# file
|
114
|
+
# when TSV
|
115
|
+
# file.dumper_stream
|
116
|
+
# when TSV::Dumper
|
117
|
+
# file.stream
|
118
|
+
# when TSV::Parser
|
119
|
+
# file.stream
|
120
|
+
# when Path
|
121
|
+
# file.open(open_options)
|
122
|
+
# when (defined? Tempfile and Tempfile)
|
123
|
+
# begin
|
124
|
+
# pos = file.pos
|
125
|
+
# file.rewind if file.respond_to?(:rewind) and pos != 0
|
126
|
+
# rescue Exception
|
127
|
+
# end
|
128
|
+
# file
|
129
|
+
# when IO, StringIO, File
|
130
|
+
# begin
|
131
|
+
# pos = file.pos
|
132
|
+
# file.rewind if file.respond_to?(:rewind) and pos != 0
|
133
|
+
# rescue
|
134
|
+
# end
|
135
|
+
# file
|
136
|
+
# when String
|
137
|
+
# if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
|
138
|
+
# Open.open(file, open_options)
|
139
|
+
# else
|
140
|
+
# StringIO.new file
|
141
|
+
# end
|
142
|
+
# when (defined? Step and Step)
|
143
|
+
# if file.respond_to?(:base_url)
|
144
|
+
# if file.result and IO === file.result
|
145
|
+
# file.result
|
146
|
+
# else
|
147
|
+
# file.join
|
148
|
+
# get_stream(file.path, open_options.merge(:nocache => true))
|
149
|
+
# end
|
150
|
+
# else
|
151
|
+
# file.grace
|
152
|
+
#
|
153
|
+
# stream = file.get_stream
|
154
|
+
# if stream && ! stream.closed?
|
155
|
+
# stream
|
156
|
+
# else
|
157
|
+
# file.join
|
158
|
+
# raise "Aborted stream from Step #{file.path}" if file.aborted?
|
159
|
+
# raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
|
160
|
+
# get_stream(file.path, open_options)
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
# when Array
|
164
|
+
# Misc.open_pipe do |sin|
|
165
|
+
# file.each do |l|
|
166
|
+
# sin.puts l
|
167
|
+
# end
|
168
|
+
# end
|
169
|
+
# when Set
|
170
|
+
# get_stream(file.to_a, open_options)
|
171
|
+
# when Enumerable
|
172
|
+
# file
|
173
|
+
# else
|
174
|
+
# raise "Cannot get stream from: #{file.inspect}"
|
175
|
+
# end
|
176
|
+
# end
|
177
|
+
#
|
178
|
+
# def self.identify_field(key_field, fields, field)
|
179
|
+
# case field
|
180
|
+
# when nil
|
181
|
+
# :key
|
182
|
+
# when Symbol
|
183
|
+
# field == :key ? field : identify_field(key_field, fields, field.to_s)
|
184
|
+
# when Integer
|
185
|
+
# field
|
186
|
+
# when (fields.nil? and String)
|
187
|
+
# raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
|
188
|
+
# identify_field(key_field, fields, field.to_i)
|
189
|
+
# when String
|
190
|
+
# return :key if key_field == field
|
191
|
+
# pos = fields.index field
|
192
|
+
# return pos if pos
|
193
|
+
# return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
|
194
|
+
# if fields.select{|f| f.include?("(") }.any?
|
195
|
+
# simplify_fields = fields.collect do |f|
|
196
|
+
# if m = f.match(/(.*)\s+\(.*\)/)
|
197
|
+
# m[1]
|
198
|
+
# else
|
199
|
+
# f
|
200
|
+
# end
|
201
|
+
# end
|
202
|
+
# return identify_field(key_field, simplify_fields, field)
|
203
|
+
# end
|
204
|
+
# raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
|
205
|
+
# else
|
206
|
+
# raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
|
207
|
+
# end
|
208
|
+
# end
|
209
|
+
#
|
210
|
+
#
|
211
|
+
#
|
212
|
+
# def self.header_lines(key_field, fields, entry_hash = nil)
|
213
|
+
# if Hash === entry_hash
|
214
|
+
# sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
215
|
+
# preamble = entry_hash[:preamble]
|
216
|
+
# header_hash = entry_hash[:header_hash]
|
217
|
+
# end
|
218
|
+
#
|
219
|
+
# header_hash = "#" if header_hash.nil?
|
220
|
+
#
|
221
|
+
# preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
222
|
+
#
|
223
|
+
# str = ""
|
224
|
+
# str << preamble.strip << "\n" if preamble and not preamble.empty?
|
225
|
+
# if fields
|
226
|
+
# if fields.empty?
|
227
|
+
# str << header_hash << (key_field || "ID").to_s << "\n"
|
228
|
+
# else
|
229
|
+
# str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
230
|
+
# end
|
231
|
+
# end
|
232
|
+
#
|
233
|
+
# str
|
234
|
+
# end
|
235
|
+
#
|
236
|
+
# def identify_field(field)
|
237
|
+
# TSV.identify_field(key_field, fields, field)
|
238
|
+
# end
|
239
|
+
#
|
240
|
+
# def rename_field(field, new)
|
241
|
+
# self.fields = self.fields.collect{|f| f == field ? new : f }
|
242
|
+
# self
|
243
|
+
# end
|
244
|
+
#
|
245
|
+
# def unzip_replicates
|
246
|
+
# raise "Can only unzip replicates in :double TSVs" unless type == :double
|
247
|
+
#
|
248
|
+
# new = {}
|
249
|
+
# self.with_unnamed do
|
250
|
+
# through do |k,vs|
|
251
|
+
# Misc.zip_fields(vs).each_with_index do |v,i|
|
252
|
+
# new[k + "(#{i})"] = v
|
253
|
+
# end
|
254
|
+
# end
|
255
|
+
# end
|
256
|
+
#
|
257
|
+
# self.annotate(new)
|
258
|
+
# new.type = :list
|
259
|
+
#
|
260
|
+
# new
|
261
|
+
# end
|
262
|
+
#
|
263
|
+
# def to_list(&block)
|
264
|
+
# new = {}
|
265
|
+
# case type
|
266
|
+
# when :double
|
267
|
+
# if block_given?
|
268
|
+
# through do |k,v|
|
269
|
+
# if block.arity == 1
|
270
|
+
# new[k] = v.collect{|e| yield e}
|
271
|
+
# else
|
272
|
+
# new[k] = yield k, v
|
273
|
+
# end
|
274
|
+
# end
|
275
|
+
# else
|
276
|
+
# through do |k,v|
|
277
|
+
# new[k] = v.collect{|e| e.first}
|
278
|
+
# end
|
279
|
+
# end
|
280
|
+
# when :flat
|
281
|
+
# through do |k,v|
|
282
|
+
# new[k] = [v.first]
|
283
|
+
# end
|
284
|
+
# when :single
|
285
|
+
# through do |k,v|
|
286
|
+
# new[k] = [v]
|
287
|
+
# end
|
288
|
+
# when :list
|
289
|
+
# return self
|
290
|
+
# end
|
291
|
+
# self.annotate(new)
|
292
|
+
# new.type = :list
|
293
|
+
# new
|
294
|
+
# end
|
295
|
+
#
|
296
|
+
# def to_double
|
297
|
+
# new = {}
|
298
|
+
# case type
|
299
|
+
# when :double
|
300
|
+
# return self
|
301
|
+
# when :flat
|
302
|
+
# through do |k,v|
|
303
|
+
# new[k] = v.nil? ? [] : [v]
|
304
|
+
# end
|
305
|
+
# when :single
|
306
|
+
# through do |k,v|
|
307
|
+
# new[k] = v.nil? ? [[]] : [[v]]
|
308
|
+
# end
|
309
|
+
# when :list
|
310
|
+
# if block_given?
|
311
|
+
# through do |k,v|
|
312
|
+
# if v.nil?
|
313
|
+
# new[k] = nil
|
314
|
+
# else
|
315
|
+
# new[k] = v.collect{|e| yield e}
|
316
|
+
# end
|
317
|
+
# end
|
318
|
+
# else
|
319
|
+
# through do |k,v|
|
320
|
+
# if v.nil?
|
321
|
+
# new[k] = nil
|
322
|
+
# else
|
323
|
+
# new[k] = v.collect{|e| [e]}
|
324
|
+
# end
|
325
|
+
# end
|
326
|
+
# end
|
327
|
+
# end
|
328
|
+
# self.annotate(new)
|
329
|
+
# new.type = :double
|
330
|
+
# new
|
331
|
+
# end
|
332
|
+
#
|
333
|
+
# def to_flat(field = nil)
|
334
|
+
# new = {}
|
335
|
+
# case type
|
336
|
+
# when :double
|
337
|
+
# if field.nil?
|
338
|
+
# through do |k,v| new[k] = v.first end
|
339
|
+
# elsif field == :all
|
340
|
+
# through do |k,v| new[k] = v.flatten.compact end
|
341
|
+
# else
|
342
|
+
# pos = identify_field field
|
343
|
+
# through do |k,v| new[k] = v[pos] end
|
344
|
+
# end
|
345
|
+
# when :flat
|
346
|
+
# return self
|
347
|
+
# when :single
|
348
|
+
# through do |k,v|
|
349
|
+
# new[k] = [v]
|
350
|
+
# end
|
351
|
+
# when :list
|
352
|
+
# through do |k,v|
|
353
|
+
# new[k] = [v.first]
|
354
|
+
# end
|
355
|
+
# end
|
356
|
+
# self.annotate(new)
|
357
|
+
# if new.fields
|
358
|
+
# case field
|
359
|
+
# when nil
|
360
|
+
# new.fields = new.fields[0..0]
|
361
|
+
# when :all
|
362
|
+
# new.fields = [new.fields * "+"]
|
363
|
+
# else
|
364
|
+
# new.fields = [field]
|
365
|
+
# end
|
366
|
+
# end
|
367
|
+
# new.type = :flat
|
368
|
+
# new
|
369
|
+
# end
|
370
|
+
#
|
371
|
+
# def to_single
|
372
|
+
# new = {}
|
373
|
+
#
|
374
|
+
# if block_given?
|
375
|
+
# through do |k,v|
|
376
|
+
# new[k] = yield v
|
377
|
+
# end
|
378
|
+
# else
|
379
|
+
# case type
|
380
|
+
# when :double
|
381
|
+
# through do |k,v|
|
382
|
+
# new[k] = v.first.first
|
383
|
+
# end
|
384
|
+
# when :flat
|
385
|
+
# through do |k,v|
|
386
|
+
# new[k] = v.first
|
387
|
+
# end
|
388
|
+
# when :single
|
389
|
+
# return self
|
390
|
+
# when :list
|
391
|
+
# through do |k,v|
|
392
|
+
# new[k] = v.nil? ? nil : v.first
|
393
|
+
# end
|
394
|
+
# end
|
395
|
+
# end
|
396
|
+
#
|
397
|
+
# self.annotate(new)
|
398
|
+
# new.type = :single
|
399
|
+
# new.fields = [new.fields.first] if new.fields.length > 1
|
400
|
+
# new
|
401
|
+
# end
|
402
|
+
#
|
403
|
+
#
|
404
|
+
# def to_onehot(boolean = false)
|
405
|
+
# all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
|
406
|
+
# index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
|
407
|
+
# index.cast = :to_i unless boolean
|
408
|
+
# through do |key,values|
|
409
|
+
# v = all_values.collect{|_v| values.include?(_v)}
|
410
|
+
# v = v.collect{|_v| _v ? 1 : 0 } unless boolean
|
411
|
+
# index[key] = v
|
412
|
+
# end
|
413
|
+
# index
|
414
|
+
# end
|
415
|
+
#
|
416
|
+
# def merge(other)
|
417
|
+
# self.annotate(super(other))
|
418
|
+
# end
|
419
|
+
#end
|
420
|
+
#
|