rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
data/lib/rbbt/tsv/util.rb CHANGED
@@ -1,420 +1,420 @@
1
- require 'rbbt/resource/path'
2
- module TSV
3
-
4
- def self.stream_column(file, column)
5
- header = TSV.parse_header(file)
6
- pos = header.fields.index(column) + 1
7
- sep2 = header.options[:sep2] || "|"
8
- case header.type.to_s
9
- when nil, "double"
10
- TSV.traverse file, :type => :array, :into => :stream do |line|
11
- next if line =~ /^#/
12
- line.split("\t")[pos].gsub(sep2, "\n")
13
- end
14
- when "single"
15
- TSV.traverse file, :type => :array, :into => :stream do |line|
16
- next if line =~ /^#/
17
- line.split("\t")[1]
18
- end
19
- when "flat"
20
- TSV.traverse file, :type => :array, :into => :stream do |line|
21
- next if line =~ /^#/
22
- line.split("\t")[1..-1] * "\n"
23
- end
24
- when 'list'
25
- TSV.traverse file, :type => :array, :into => :stream do |line|
26
- next if line =~ /^#/
27
- line.split("\t")[pos]
28
- end
29
- end
30
- end
31
-
32
- def self.guess_id(identifier_file, values, options = {})
33
- field_matches = TSV.field_match_counts(identifier_file, values, options)
34
- field_matches.sort_by{|field, count| count.to_i}.last
35
- end
36
-
37
- def self.field_match_counts(file, values, options = {})
38
- options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
39
- persist_options = Misc.pull_keys options, :persist
40
-
41
- filename = TSV === file ? file.filename : file
42
- path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
43
- tsv = TSV === file ? file : TSV.open(file, options)
44
-
45
- text = ""
46
- fields = nil
47
- tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
48
- names.zip(fields).each do |list, format|
49
- list = [list] unless Array === list
50
- list.delete_if do |name| name.empty? end
51
- next if list.empty?
52
- text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
53
- end
54
- text << [gene, tsv.key_field] * "\t" << "\n"
55
- end
56
- text
57
- end
58
-
59
- TmpFile.with_file(values.uniq * "\n", false) do |value_file|
60
- cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
61
- begin
62
- TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
63
- rescue
64
- Log.exception $!
65
- TSV.setup({}, :type => :single, :cast => :to_i)
66
- end
67
- end
68
- end
69
-
70
- def self.get_filename(file)
71
- case
72
- when (defined? Step and Step === file)
73
- file.path
74
- when Path === file
75
- file
76
- when (String === file and (Open.exists? file or Open.remote? file))
77
- file
78
- when String === file
79
- "String-#{Misc.digest file}"
80
- when file.respond_to?(:filename)
81
- file.filename
82
- when file.respond_to?(:gets)
83
- nil
84
- else
85
- raise "Cannot get filename from: #{file.inspect}"
86
- end
87
- end
88
-
89
- def self.abort_stream(file, exception = nil)
90
- return if file.nil?
91
- if defined? Step and Step === file
92
- if exception
93
- file.exception exception
94
- else
95
- if not (file.aborted? or file.done?)
96
- file.abort
97
- end
98
- end
99
- elsif Hash === file or Array === file
100
- return
101
- else
102
- stream = get_stream(file)
103
- stream.abort(exception) if stream.respond_to? :abort
104
- AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
105
- end
106
- end
107
-
108
- def self.get_stream(file, open_options = {})
109
- case file
110
- when Zlib::GzipReader
111
- file
112
- when (defined? Bgzf and Bgzf)
113
- file
114
- when TSV
115
- file.dumper_stream
116
- when TSV::Dumper
117
- file.stream
118
- when TSV::Parser
119
- file.stream
120
- when Path
121
- file.open(open_options)
122
- when (defined? Tempfile and Tempfile)
123
- begin
124
- pos = file.pos
125
- file.rewind if file.respond_to?(:rewind) and pos != 0
126
- rescue Exception
127
- end
128
- file
129
- when IO, StringIO, File
130
- begin
131
- pos = file.pos
132
- file.rewind if file.respond_to?(:rewind) and pos != 0
133
- rescue
134
- end
135
- file
136
- when String
137
- if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
138
- Open.open(file, open_options)
139
- else
140
- StringIO.new file
141
- end
142
- when (defined? Step and Step)
143
- if file.respond_to?(:base_url)
144
- if file.result and IO === file.result
145
- file.result
146
- else
147
- file.join
148
- get_stream(file.path, open_options.merge(:nocache => true))
149
- end
150
- else
151
- file.grace
152
-
153
- stream = file.get_stream
154
- if stream && ! stream.closed?
155
- stream
156
- else
157
- file.join
158
- raise "Aborted stream from Step #{file.path}" if file.aborted?
159
- raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
160
- get_stream(file.path, open_options)
161
- end
162
- end
163
- when Array
164
- Misc.open_pipe do |sin|
165
- file.each do |l|
166
- sin.puts l
167
- end
168
- end
169
- when Set
170
- get_stream(file.to_a, open_options)
171
- when Enumerable
172
- file
173
- else
174
- raise "Cannot get stream from: #{file.inspect}"
175
- end
176
- end
177
-
178
- def self.identify_field(key_field, fields, field)
179
- case field
180
- when nil
181
- :key
182
- when Symbol
183
- field == :key ? field : identify_field(key_field, fields, field.to_s)
184
- when Integer
185
- field
186
- when (fields.nil? and String)
187
- raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
188
- identify_field(key_field, fields, field.to_i)
189
- when String
190
- return :key if key_field == field
191
- pos = fields.index field
192
- return pos if pos
193
- return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
194
- if fields.select{|f| f.include?("(") }.any?
195
- simplify_fields = fields.collect do |f|
196
- if m = f.match(/(.*)\s+\(.*\)/)
197
- m[1]
198
- else
199
- f
200
- end
201
- end
202
- return identify_field(key_field, simplify_fields, field)
203
- end
204
- raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
205
- else
206
- raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
207
- end
208
- end
209
-
210
-
211
-
212
- def self.header_lines(key_field, fields, entry_hash = nil)
213
- if Hash === entry_hash
214
- sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
215
- preamble = entry_hash[:preamble]
216
- header_hash = entry_hash[:header_hash]
217
- end
218
-
219
- header_hash = "#" if header_hash.nil?
220
-
221
- preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
222
-
223
- str = ""
224
- str << preamble.strip << "\n" if preamble and not preamble.empty?
225
- if fields
226
- if fields.empty?
227
- str << header_hash << (key_field || "ID").to_s << "\n"
228
- else
229
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
230
- end
231
- end
232
-
233
- str
234
- end
235
-
236
- def identify_field(field)
237
- TSV.identify_field(key_field, fields, field)
238
- end
239
-
240
- def rename_field(field, new)
241
- self.fields = self.fields.collect{|f| f == field ? new : f }
242
- self
243
- end
244
-
245
- def unzip_replicates
246
- raise "Can only unzip replicates in :double TSVs" unless type == :double
247
-
248
- new = {}
249
- self.with_unnamed do
250
- through do |k,vs|
251
- Misc.zip_fields(vs).each_with_index do |v,i|
252
- new[k + "(#{i})"] = v
253
- end
254
- end
255
- end
256
-
257
- self.annotate(new)
258
- new.type = :list
259
-
260
- new
261
- end
262
-
263
- def to_list(&block)
264
- new = {}
265
- case type
266
- when :double
267
- if block_given?
268
- through do |k,v|
269
- if block.arity == 1
270
- new[k] = v.collect{|e| yield e}
271
- else
272
- new[k] = yield k, v
273
- end
274
- end
275
- else
276
- through do |k,v|
277
- new[k] = v.collect{|e| e.first}
278
- end
279
- end
280
- when :flat
281
- through do |k,v|
282
- new[k] = [v.first]
283
- end
284
- when :single
285
- through do |k,v|
286
- new[k] = [v]
287
- end
288
- when :list
289
- return self
290
- end
291
- self.annotate(new)
292
- new.type = :list
293
- new
294
- end
295
-
296
- def to_double
297
- new = {}
298
- case type
299
- when :double
300
- return self
301
- when :flat
302
- through do |k,v|
303
- new[k] = v.nil? ? [] : [v]
304
- end
305
- when :single
306
- through do |k,v|
307
- new[k] = v.nil? ? [[]] : [[v]]
308
- end
309
- when :list
310
- if block_given?
311
- through do |k,v|
312
- if v.nil?
313
- new[k] = nil
314
- else
315
- new[k] = v.collect{|e| yield e}
316
- end
317
- end
318
- else
319
- through do |k,v|
320
- if v.nil?
321
- new[k] = nil
322
- else
323
- new[k] = v.collect{|e| [e]}
324
- end
325
- end
326
- end
327
- end
328
- self.annotate(new)
329
- new.type = :double
330
- new
331
- end
332
-
333
- def to_flat(field = nil)
334
- new = {}
335
- case type
336
- when :double
337
- if field.nil?
338
- through do |k,v| new[k] = v.first end
339
- elsif field == :all
340
- through do |k,v| new[k] = v.flatten.compact end
341
- else
342
- pos = identify_field field
343
- through do |k,v| new[k] = v[pos] end
344
- end
345
- when :flat
346
- return self
347
- when :single
348
- through do |k,v|
349
- new[k] = [v]
350
- end
351
- when :list
352
- through do |k,v|
353
- new[k] = [v.first]
354
- end
355
- end
356
- self.annotate(new)
357
- if new.fields
358
- case field
359
- when nil
360
- new.fields = new.fields[0..0]
361
- when :all
362
- new.fields = [new.fields * "+"]
363
- else
364
- new.fields = [field]
365
- end
366
- end
367
- new.type = :flat
368
- new
369
- end
370
-
371
- def to_single
372
- new = {}
373
-
374
- if block_given?
375
- through do |k,v|
376
- new[k] = yield v
377
- end
378
- else
379
- case type
380
- when :double
381
- through do |k,v|
382
- new[k] = v.first.first
383
- end
384
- when :flat
385
- through do |k,v|
386
- new[k] = v.first
387
- end
388
- when :single
389
- return self
390
- when :list
391
- through do |k,v|
392
- new[k] = v.nil? ? nil : v.first
393
- end
394
- end
395
- end
396
-
397
- self.annotate(new)
398
- new.type = :single
399
- new.fields = [new.fields.first] if new.fields.length > 1
400
- new
401
- end
402
-
403
-
404
- def to_onehot(boolean = false)
405
- all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
406
- index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
407
- index.cast = :to_i unless boolean
408
- through do |key,values|
409
- v = all_values.collect{|_v| values.include?(_v)}
410
- v = v.collect{|_v| _v ? 1 : 0 } unless boolean
411
- index[key] = v
412
- end
413
- index
414
- end
415
-
416
- def merge(other)
417
- self.annotate(super(other))
418
- end
419
- end
420
-
1
+ #require 'rbbt/resource/path'
2
+ #module TSV
3
+ #
4
+ # def self.stream_column(file, column)
5
+ # header = TSV.parse_header(file)
6
+ # pos = header.fields.index(column) + 1
7
+ # sep2 = header.options[:sep2] || "|"
8
+ # case header.type.to_s
9
+ # when nil, "double"
10
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
11
+ # next if line =~ /^#/
12
+ # line.split("\t")[pos].gsub(sep2, "\n")
13
+ # end
14
+ # when "single"
15
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
16
+ # next if line =~ /^#/
17
+ # line.split("\t")[1]
18
+ # end
19
+ # when "flat"
20
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
21
+ # next if line =~ /^#/
22
+ # line.split("\t")[1..-1] * "\n"
23
+ # end
24
+ # when 'list'
25
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
26
+ # next if line =~ /^#/
27
+ # line.split("\t")[pos]
28
+ # end
29
+ # end
30
+ # end
31
+ #
32
+ # def self.guess_id(identifier_file, values, options = {})
33
+ # field_matches = TSV.field_match_counts(identifier_file, values, options)
34
+ # field_matches.sort_by{|field, count| count.to_i}.last
35
+ # end
36
+ #
37
+ # def self.field_match_counts(file, values, options = {})
38
+ # options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
39
+ # persist_options = IndiferentHash.pull_keys options, :persist
40
+ #
41
+ # filename = TSV === file ? file.filename : file
42
+ # path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
43
+ # tsv = TSV === file ? file : TSV.open(file, options)
44
+ #
45
+ # text = ""
46
+ # fields = nil
47
+ # tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
48
+ # names.zip(fields).each do |list, format|
49
+ # list = [list] unless Array === list
50
+ # list.delete_if do |name| name.empty? end
51
+ # next if list.empty?
52
+ # text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
53
+ # end
54
+ # text << [gene, tsv.key_field] * "\t" << "\n"
55
+ # end
56
+ # text
57
+ # end
58
+ #
59
+ # TmpFile.with_file(values.uniq * "\n", false) do |value_file|
60
+ # cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
61
+ # begin
62
+ # TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
63
+ # rescue
64
+ # Log.exception $!
65
+ # TSV.setup({}, :type => :single, :cast => :to_i)
66
+ # end
67
+ # end
68
+ # end
69
+ #
70
+ # def self.get_filename(file)
71
+ # case
72
+ # when (defined? Step and Step === file)
73
+ # file.path
74
+ # when Path === file
75
+ # file
76
+ # when (String === file and (Open.exists? file or Open.remote? file))
77
+ # file
78
+ # when String === file
79
+ # "String-#{Misc.digest file}"
80
+ # when file.respond_to?(:filename)
81
+ # file.filename
82
+ # when file.respond_to?(:gets)
83
+ # nil
84
+ # else
85
+ # raise "Cannot get filename from: #{file.inspect}"
86
+ # end
87
+ # end
88
+ #
89
+ # def self.abort_stream(file, exception = nil)
90
+ # return if file.nil?
91
+ # if defined? Step and Step === file
92
+ # if exception
93
+ # file.exception exception
94
+ # else
95
+ # if not (file.aborted? or file.done?)
96
+ # file.abort
97
+ # end
98
+ # end
99
+ # elsif Hash === file or Array === file
100
+ # return
101
+ # else
102
+ # stream = get_stream(file)
103
+ # stream.abort(exception) if stream.respond_to? :abort
104
+ # AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
105
+ # end
106
+ # end
107
+ #
108
+ # def self.get_stream(file, open_options = {})
109
+ # case file
110
+ # when Zlib::GzipReader
111
+ # file
112
+ # when (defined? Bgzf and Bgzf)
113
+ # file
114
+ # when TSV
115
+ # file.dumper_stream
116
+ # when TSV::Dumper
117
+ # file.stream
118
+ # when TSV::Parser
119
+ # file.stream
120
+ # when Path
121
+ # file.open(open_options)
122
+ # when (defined? Tempfile and Tempfile)
123
+ # begin
124
+ # pos = file.pos
125
+ # file.rewind if file.respond_to?(:rewind) and pos != 0
126
+ # rescue Exception
127
+ # end
128
+ # file
129
+ # when IO, StringIO, File
130
+ # begin
131
+ # pos = file.pos
132
+ # file.rewind if file.respond_to?(:rewind) and pos != 0
133
+ # rescue
134
+ # end
135
+ # file
136
+ # when String
137
+ # if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
138
+ # Open.open(file, open_options)
139
+ # else
140
+ # StringIO.new file
141
+ # end
142
+ # when (defined? Step and Step)
143
+ # if file.respond_to?(:base_url)
144
+ # if file.result and IO === file.result
145
+ # file.result
146
+ # else
147
+ # file.join
148
+ # get_stream(file.path, open_options.merge(:nocache => true))
149
+ # end
150
+ # else
151
+ # file.grace
152
+ #
153
+ # stream = file.get_stream
154
+ # if stream && ! stream.closed?
155
+ # stream
156
+ # else
157
+ # file.join
158
+ # raise "Aborted stream from Step #{file.path}" if file.aborted?
159
+ # raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
160
+ # get_stream(file.path, open_options)
161
+ # end
162
+ # end
163
+ # when Array
164
+ # Misc.open_pipe do |sin|
165
+ # file.each do |l|
166
+ # sin.puts l
167
+ # end
168
+ # end
169
+ # when Set
170
+ # get_stream(file.to_a, open_options)
171
+ # when Enumerable
172
+ # file
173
+ # else
174
+ # raise "Cannot get stream from: #{file.inspect}"
175
+ # end
176
+ # end
177
+ #
178
+ # def self.identify_field(key_field, fields, field)
179
+ # case field
180
+ # when nil
181
+ # :key
182
+ # when Symbol
183
+ # field == :key ? field : identify_field(key_field, fields, field.to_s)
184
+ # when Integer
185
+ # field
186
+ # when (fields.nil? and String)
187
+ # raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
188
+ # identify_field(key_field, fields, field.to_i)
189
+ # when String
190
+ # return :key if key_field == field
191
+ # pos = fields.index field
192
+ # return pos if pos
193
+ # return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
194
+ # if fields.select{|f| f.include?("(") }.any?
195
+ # simplify_fields = fields.collect do |f|
196
+ # if m = f.match(/(.*)\s+\(.*\)/)
197
+ # m[1]
198
+ # else
199
+ # f
200
+ # end
201
+ # end
202
+ # return identify_field(key_field, simplify_fields, field)
203
+ # end
204
+ # raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
205
+ # else
206
+ # raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
207
+ # end
208
+ # end
209
+ #
210
+ #
211
+ #
212
+ # def self.header_lines(key_field, fields, entry_hash = nil)
213
+ # if Hash === entry_hash
214
+ # sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
215
+ # preamble = entry_hash[:preamble]
216
+ # header_hash = entry_hash[:header_hash]
217
+ # end
218
+ #
219
+ # header_hash = "#" if header_hash.nil?
220
+ #
221
+ # preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
222
+ #
223
+ # str = ""
224
+ # str << preamble.strip << "\n" if preamble and not preamble.empty?
225
+ # if fields
226
+ # if fields.empty?
227
+ # str << header_hash << (key_field || "ID").to_s << "\n"
228
+ # else
229
+ # str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
230
+ # end
231
+ # end
232
+ #
233
+ # str
234
+ # end
235
+ #
236
+ # def identify_field(field)
237
+ # TSV.identify_field(key_field, fields, field)
238
+ # end
239
+ #
240
+ # def rename_field(field, new)
241
+ # self.fields = self.fields.collect{|f| f == field ? new : f }
242
+ # self
243
+ # end
244
+ #
245
+ # def unzip_replicates
246
+ # raise "Can only unzip replicates in :double TSVs" unless type == :double
247
+ #
248
+ # new = {}
249
+ # self.with_unnamed do
250
+ # through do |k,vs|
251
+ # Misc.zip_fields(vs).each_with_index do |v,i|
252
+ # new[k + "(#{i})"] = v
253
+ # end
254
+ # end
255
+ # end
256
+ #
257
+ # self.annotate(new)
258
+ # new.type = :list
259
+ #
260
+ # new
261
+ # end
262
+ #
263
+ # def to_list(&block)
264
+ # new = {}
265
+ # case type
266
+ # when :double
267
+ # if block_given?
268
+ # through do |k,v|
269
+ # if block.arity == 1
270
+ # new[k] = v.collect{|e| yield e}
271
+ # else
272
+ # new[k] = yield k, v
273
+ # end
274
+ # end
275
+ # else
276
+ # through do |k,v|
277
+ # new[k] = v.collect{|e| e.first}
278
+ # end
279
+ # end
280
+ # when :flat
281
+ # through do |k,v|
282
+ # new[k] = [v.first]
283
+ # end
284
+ # when :single
285
+ # through do |k,v|
286
+ # new[k] = [v]
287
+ # end
288
+ # when :list
289
+ # return self
290
+ # end
291
+ # self.annotate(new)
292
+ # new.type = :list
293
+ # new
294
+ # end
295
+ #
296
+ # def to_double
297
+ # new = {}
298
+ # case type
299
+ # when :double
300
+ # return self
301
+ # when :flat
302
+ # through do |k,v|
303
+ # new[k] = v.nil? ? [] : [v]
304
+ # end
305
+ # when :single
306
+ # through do |k,v|
307
+ # new[k] = v.nil? ? [[]] : [[v]]
308
+ # end
309
+ # when :list
310
+ # if block_given?
311
+ # through do |k,v|
312
+ # if v.nil?
313
+ # new[k] = nil
314
+ # else
315
+ # new[k] = v.collect{|e| yield e}
316
+ # end
317
+ # end
318
+ # else
319
+ # through do |k,v|
320
+ # if v.nil?
321
+ # new[k] = nil
322
+ # else
323
+ # new[k] = v.collect{|e| [e]}
324
+ # end
325
+ # end
326
+ # end
327
+ # end
328
+ # self.annotate(new)
329
+ # new.type = :double
330
+ # new
331
+ # end
332
+ #
333
+ # def to_flat(field = nil)
334
+ # new = {}
335
+ # case type
336
+ # when :double
337
+ # if field.nil?
338
+ # through do |k,v| new[k] = v.first end
339
+ # elsif field == :all
340
+ # through do |k,v| new[k] = v.flatten.compact end
341
+ # else
342
+ # pos = identify_field field
343
+ # through do |k,v| new[k] = v[pos] end
344
+ # end
345
+ # when :flat
346
+ # return self
347
+ # when :single
348
+ # through do |k,v|
349
+ # new[k] = [v]
350
+ # end
351
+ # when :list
352
+ # through do |k,v|
353
+ # new[k] = [v.first]
354
+ # end
355
+ # end
356
+ # self.annotate(new)
357
+ # if new.fields
358
+ # case field
359
+ # when nil
360
+ # new.fields = new.fields[0..0]
361
+ # when :all
362
+ # new.fields = [new.fields * "+"]
363
+ # else
364
+ # new.fields = [field]
365
+ # end
366
+ # end
367
+ # new.type = :flat
368
+ # new
369
+ # end
370
+ #
371
+ # def to_single
372
+ # new = {}
373
+ #
374
+ # if block_given?
375
+ # through do |k,v|
376
+ # new[k] = yield v
377
+ # end
378
+ # else
379
+ # case type
380
+ # when :double
381
+ # through do |k,v|
382
+ # new[k] = v.first.first
383
+ # end
384
+ # when :flat
385
+ # through do |k,v|
386
+ # new[k] = v.first
387
+ # end
388
+ # when :single
389
+ # return self
390
+ # when :list
391
+ # through do |k,v|
392
+ # new[k] = v.nil? ? nil : v.first
393
+ # end
394
+ # end
395
+ # end
396
+ #
397
+ # self.annotate(new)
398
+ # new.type = :single
399
+ # new.fields = [new.fields.first] if new.fields.length > 1
400
+ # new
401
+ # end
402
+ #
403
+ #
404
+ # def to_onehot(boolean = false)
405
+ # all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
406
+ # index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
407
+ # index.cast = :to_i unless boolean
408
+ # through do |key,values|
409
+ # v = all_values.collect{|_v| values.include?(_v)}
410
+ # v = v.collect{|_v| _v ? 1 : 0 } unless boolean
411
+ # index[key] = v
412
+ # end
413
+ # index
414
+ # end
415
+ #
416
+ # def merge(other)
417
+ # self.annotate(super(other))
418
+ # end
419
+ #end
420
+ #