rbbt-util 5.44.1 → 6.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +67 -90
  3. data/etc/app.d/base.rb +2 -2
  4. data/etc/app.d/semaphores.rb +3 -3
  5. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  6. data/lib/rbbt/annotations/refactor.rb +27 -0
  7. data/lib/rbbt/annotations/util.rb +282 -282
  8. data/lib/rbbt/annotations.rb +343 -320
  9. data/lib/rbbt/association/database.rb +200 -225
  10. data/lib/rbbt/association/index.rb +294 -291
  11. data/lib/rbbt/association/item.rb +227 -227
  12. data/lib/rbbt/association/open.rb +35 -34
  13. data/lib/rbbt/association/util.rb +0 -169
  14. data/lib/rbbt/association.rb +2 -4
  15. data/lib/rbbt/entity/identifiers.rb +119 -118
  16. data/lib/rbbt/entity/refactor.rb +12 -0
  17. data/lib/rbbt/entity.rb +319 -315
  18. data/lib/rbbt/hpc/batch.rb +72 -53
  19. data/lib/rbbt/hpc/lsf.rb +2 -2
  20. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  21. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  22. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  24. data/lib/rbbt/hpc/slurm.rb +18 -18
  25. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  26. data/lib/rbbt/knowledge_base/query.rb +2 -2
  27. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  28. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  29. data/lib/rbbt/knowledge_base.rb +1 -1
  30. data/lib/rbbt/monitor.rb +36 -25
  31. data/lib/rbbt/persist/refactor.rb +166 -0
  32. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  33. data/lib/rbbt/persist/tsv.rb +187 -185
  34. data/lib/rbbt/persist.rb +556 -551
  35. data/lib/rbbt/refactor.rb +20 -0
  36. data/lib/rbbt/resource/path/refactor.rb +178 -0
  37. data/lib/rbbt/resource/path.rb +317 -497
  38. data/lib/rbbt/resource/util.rb +0 -48
  39. data/lib/rbbt/resource.rb +3 -390
  40. data/lib/rbbt/tsv/accessor.rb +2 -838
  41. data/lib/rbbt/tsv/attach.rb +303 -299
  42. data/lib/rbbt/tsv/change_id.rb +244 -245
  43. data/lib/rbbt/tsv/csv.rb +87 -85
  44. data/lib/rbbt/tsv/dumper.rb +2 -100
  45. data/lib/rbbt/tsv/excel.rb +26 -24
  46. data/lib/rbbt/tsv/field_index.rb +4 -1
  47. data/lib/rbbt/tsv/filter.rb +3 -2
  48. data/lib/rbbt/tsv/index.rb +2 -284
  49. data/lib/rbbt/tsv/manipulate.rb +750 -747
  50. data/lib/rbbt/tsv/marshal.rb +3 -3
  51. data/lib/rbbt/tsv/matrix.rb +2 -2
  52. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  53. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  54. data/lib/rbbt/tsv/parser.rb +678 -678
  55. data/lib/rbbt/tsv/refactor.rb +195 -0
  56. data/lib/rbbt/tsv/stream.rb +253 -251
  57. data/lib/rbbt/tsv/util.rb +420 -420
  58. data/lib/rbbt/tsv.rb +210 -208
  59. data/lib/rbbt/util/R/eval.rb +4 -4
  60. data/lib/rbbt/util/R/plot.rb +62 -166
  61. data/lib/rbbt/util/R.rb +21 -18
  62. data/lib/rbbt/util/cmd.rb +2 -318
  63. data/lib/rbbt/util/color.rb +269 -269
  64. data/lib/rbbt/util/colorize.rb +89 -89
  65. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  66. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  67. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  68. data/lib/rbbt/util/config.rb +169 -167
  69. data/lib/rbbt/util/iruby.rb +20 -0
  70. data/lib/rbbt/util/log/progress/report.rb +241 -241
  71. data/lib/rbbt/util/log/progress/util.rb +99 -99
  72. data/lib/rbbt/util/log/progress.rb +102 -102
  73. data/lib/rbbt/util/log/refactor.rb +49 -0
  74. data/lib/rbbt/util/log.rb +486 -532
  75. data/lib/rbbt/util/migrate.rb +1 -1
  76. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  77. data/lib/rbbt/util/misc/development.rb +12 -11
  78. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  79. data/lib/rbbt/util/misc/format.rb +2 -230
  80. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  81. data/lib/rbbt/util/misc/inspect.rb +2 -476
  82. data/lib/rbbt/util/misc/lock.rb +109 -106
  83. data/lib/rbbt/util/misc/omics.rb +9 -1
  84. data/lib/rbbt/util/misc/pipes.rb +765 -793
  85. data/lib/rbbt/util/misc/refactor.rb +20 -0
  86. data/lib/rbbt/util/misc/ssw.rb +27 -17
  87. data/lib/rbbt/util/misc/system.rb +0 -15
  88. data/lib/rbbt/util/misc.rb +39 -20
  89. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  90. data/lib/rbbt/util/named_array.rb +3 -220
  91. data/lib/rbbt/util/open/refactor.rb +7 -0
  92. data/lib/rbbt/util/open.rb +3 -857
  93. data/lib/rbbt/util/procpath.rb +6 -6
  94. data/lib/rbbt/util/python/paths.rb +27 -0
  95. data/lib/rbbt/util/python/run.rb +115 -0
  96. data/lib/rbbt/util/python/script.rb +110 -0
  97. data/lib/rbbt/util/python/util.rb +3 -3
  98. data/lib/rbbt/util/python.rb +22 -81
  99. data/lib/rbbt/util/semaphore.rb +152 -148
  100. data/lib/rbbt/util/simpleopt.rb +9 -8
  101. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  102. data/lib/rbbt/util/ssh.rb +122 -118
  103. data/lib/rbbt/util/tar.rb +117 -115
  104. data/lib/rbbt/util/tmpfile.rb +69 -67
  105. data/lib/rbbt/util/version.rb +2 -0
  106. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  107. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  108. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  109. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  110. data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
  111. data/lib/rbbt/workflow/refactor.rb +153 -0
  112. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  113. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  114. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  115. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  116. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  117. data/lib/rbbt/workflow/step/run.rb +766 -766
  118. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  119. data/lib/rbbt/workflow/step.rb +2 -362
  120. data/lib/rbbt/workflow/task.rb +118 -118
  121. data/lib/rbbt/workflow/usage.rb +289 -287
  122. data/lib/rbbt/workflow/util/archive.rb +6 -5
  123. data/lib/rbbt/workflow/util/data.rb +1 -1
  124. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  125. data/lib/rbbt/workflow/util/trace.rb +79 -44
  126. data/lib/rbbt/workflow.rb +4 -882
  127. data/lib/rbbt-util.rb +21 -13
  128. data/lib/rbbt.rb +16 -3
  129. data/python/rbbt/__init__.py +19 -1
  130. data/share/Rlib/plot.R +37 -37
  131. data/share/Rlib/svg.R +22 -5
  132. data/share/install/software/lib/install_helpers +1 -1
  133. data/share/rbbt_commands/hpc/list +2 -3
  134. data/share/rbbt_commands/hpc/orchestrate +4 -4
  135. data/share/rbbt_commands/hpc/tail +2 -0
  136. data/share/rbbt_commands/hpc/task +10 -7
  137. data/share/rbbt_commands/lsf/list +2 -3
  138. data/share/rbbt_commands/lsf/orchestrate +4 -4
  139. data/share/rbbt_commands/lsf/tail +2 -0
  140. data/share/rbbt_commands/lsf/task +10 -7
  141. data/share/rbbt_commands/migrate +1 -1
  142. data/share/rbbt_commands/pbs/list +2 -3
  143. data/share/rbbt_commands/pbs/orchestrate +4 -4
  144. data/share/rbbt_commands/pbs/tail +2 -0
  145. data/share/rbbt_commands/pbs/task +10 -7
  146. data/share/rbbt_commands/resource/produce +8 -1
  147. data/share/rbbt_commands/slurm/list +2 -3
  148. data/share/rbbt_commands/slurm/orchestrate +4 -4
  149. data/share/rbbt_commands/slurm/tail +2 -0
  150. data/share/rbbt_commands/slurm/task +10 -7
  151. data/share/rbbt_commands/system/clean +5 -5
  152. data/share/rbbt_commands/system/status +5 -5
  153. data/share/rbbt_commands/tsv/get +2 -3
  154. data/share/rbbt_commands/tsv/info +10 -13
  155. data/share/rbbt_commands/tsv/keys +18 -14
  156. data/share/rbbt_commands/tsv/slice +2 -2
  157. data/share/rbbt_commands/tsv/transpose +6 -2
  158. data/share/rbbt_commands/workflow/info +20 -24
  159. data/share/rbbt_commands/workflow/list +1 -1
  160. data/share/rbbt_commands/workflow/prov +20 -13
  161. data/share/rbbt_commands/workflow/server +11 -1
  162. data/share/rbbt_commands/workflow/task +76 -71
  163. data/share/rbbt_commands/workflow/write_info +26 -9
  164. data/share/software/opt/ssw/ssw.c +861 -0
  165. data/share/software/opt/ssw/ssw.h +130 -0
  166. data/share/workflow_config.ru +3 -3
  167. metadata +40 -2
data/lib/rbbt/tsv/util.rb CHANGED
@@ -1,420 +1,420 @@
1
- require 'rbbt/resource/path'
2
- module TSV
3
-
4
- def self.stream_column(file, column)
5
- header = TSV.parse_header(file)
6
- pos = header.fields.index(column) + 1
7
- sep2 = header.options[:sep2] || "|"
8
- case header.type.to_s
9
- when nil, "double"
10
- TSV.traverse file, :type => :array, :into => :stream do |line|
11
- next if line =~ /^#/
12
- line.split("\t")[pos].gsub(sep2, "\n")
13
- end
14
- when "single"
15
- TSV.traverse file, :type => :array, :into => :stream do |line|
16
- next if line =~ /^#/
17
- line.split("\t")[1]
18
- end
19
- when "flat"
20
- TSV.traverse file, :type => :array, :into => :stream do |line|
21
- next if line =~ /^#/
22
- line.split("\t")[1..-1] * "\n"
23
- end
24
- when 'list'
25
- TSV.traverse file, :type => :array, :into => :stream do |line|
26
- next if line =~ /^#/
27
- line.split("\t")[pos]
28
- end
29
- end
30
- end
31
-
32
- def self.guess_id(identifier_file, values, options = {})
33
- field_matches = TSV.field_match_counts(identifier_file, values, options)
34
- field_matches.sort_by{|field, count| count.to_i}.last
35
- end
36
-
37
- def self.field_match_counts(file, values, options = {})
38
- options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
39
- persist_options = Misc.pull_keys options, :persist
40
-
41
- filename = TSV === file ? file.filename : file
42
- path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
43
- tsv = TSV === file ? file : TSV.open(file, options)
44
-
45
- text = ""
46
- fields = nil
47
- tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
48
- names.zip(fields).each do |list, format|
49
- list = [list] unless Array === list
50
- list.delete_if do |name| name.empty? end
51
- next if list.empty?
52
- text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
53
- end
54
- text << [gene, tsv.key_field] * "\t" << "\n"
55
- end
56
- text
57
- end
58
-
59
- TmpFile.with_file(values.uniq * "\n", false) do |value_file|
60
- cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
61
- begin
62
- TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
63
- rescue
64
- Log.exception $!
65
- TSV.setup({}, :type => :single, :cast => :to_i)
66
- end
67
- end
68
- end
69
-
70
- def self.get_filename(file)
71
- case
72
- when (defined? Step and Step === file)
73
- file.path
74
- when Path === file
75
- file
76
- when (String === file and (Open.exists? file or Open.remote? file))
77
- file
78
- when String === file
79
- "String-#{Misc.digest file}"
80
- when file.respond_to?(:filename)
81
- file.filename
82
- when file.respond_to?(:gets)
83
- nil
84
- else
85
- raise "Cannot get filename from: #{file.inspect}"
86
- end
87
- end
88
-
89
- def self.abort_stream(file, exception = nil)
90
- return if file.nil?
91
- if defined? Step and Step === file
92
- if exception
93
- file.exception exception
94
- else
95
- if not (file.aborted? or file.done?)
96
- file.abort
97
- end
98
- end
99
- elsif Hash === file or Array === file
100
- return
101
- else
102
- stream = get_stream(file)
103
- stream.abort(exception) if stream.respond_to? :abort
104
- AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
105
- end
106
- end
107
-
108
- def self.get_stream(file, open_options = {})
109
- case file
110
- when Zlib::GzipReader
111
- file
112
- when (defined? Bgzf and Bgzf)
113
- file
114
- when TSV
115
- file.dumper_stream
116
- when TSV::Dumper
117
- file.stream
118
- when TSV::Parser
119
- file.stream
120
- when Path
121
- file.open(open_options)
122
- when (defined? Tempfile and Tempfile)
123
- begin
124
- pos = file.pos
125
- file.rewind if file.respond_to?(:rewind) and pos != 0
126
- rescue Exception
127
- end
128
- file
129
- when IO, StringIO, File
130
- begin
131
- pos = file.pos
132
- file.rewind if file.respond_to?(:rewind) and pos != 0
133
- rescue
134
- end
135
- file
136
- when String
137
- if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
138
- Open.open(file, open_options)
139
- else
140
- StringIO.new file
141
- end
142
- when (defined? Step and Step)
143
- if file.respond_to?(:base_url)
144
- if file.result and IO === file.result
145
- file.result
146
- else
147
- file.join
148
- get_stream(file.path, open_options.merge(:nocache => true))
149
- end
150
- else
151
- file.grace
152
-
153
- stream = file.get_stream
154
- if stream && ! stream.closed?
155
- stream
156
- else
157
- file.join
158
- raise "Aborted stream from Step #{file.path}" if file.aborted?
159
- raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
160
- get_stream(file.path, open_options)
161
- end
162
- end
163
- when Array
164
- Misc.open_pipe do |sin|
165
- file.each do |l|
166
- sin.puts l
167
- end
168
- end
169
- when Set
170
- get_stream(file.to_a, open_options)
171
- when Enumerable
172
- file
173
- else
174
- raise "Cannot get stream from: #{file.inspect}"
175
- end
176
- end
177
-
178
- def self.identify_field(key_field, fields, field)
179
- case field
180
- when nil
181
- :key
182
- when Symbol
183
- field == :key ? field : identify_field(key_field, fields, field.to_s)
184
- when Integer
185
- field
186
- when (fields.nil? and String)
187
- raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
188
- identify_field(key_field, fields, field.to_i)
189
- when String
190
- return :key if key_field == field
191
- pos = fields.index field
192
- return pos if pos
193
- return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
194
- if fields.select{|f| f.include?("(") }.any?
195
- simplify_fields = fields.collect do |f|
196
- if m = f.match(/(.*)\s+\(.*\)/)
197
- m[1]
198
- else
199
- f
200
- end
201
- end
202
- return identify_field(key_field, simplify_fields, field)
203
- end
204
- raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
205
- else
206
- raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
207
- end
208
- end
209
-
210
-
211
-
212
- def self.header_lines(key_field, fields, entry_hash = nil)
213
- if Hash === entry_hash
214
- sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
215
- preamble = entry_hash[:preamble]
216
- header_hash = entry_hash[:header_hash]
217
- end
218
-
219
- header_hash = "#" if header_hash.nil?
220
-
221
- preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
222
-
223
- str = ""
224
- str << preamble.strip << "\n" if preamble and not preamble.empty?
225
- if fields
226
- if fields.empty?
227
- str << header_hash << (key_field || "ID").to_s << "\n"
228
- else
229
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
230
- end
231
- end
232
-
233
- str
234
- end
235
-
236
- def identify_field(field)
237
- TSV.identify_field(key_field, fields, field)
238
- end
239
-
240
- def rename_field(field, new)
241
- self.fields = self.fields.collect{|f| f == field ? new : f }
242
- self
243
- end
244
-
245
- def unzip_replicates
246
- raise "Can only unzip replicates in :double TSVs" unless type == :double
247
-
248
- new = {}
249
- self.with_unnamed do
250
- through do |k,vs|
251
- Misc.zip_fields(vs).each_with_index do |v,i|
252
- new[k + "(#{i})"] = v
253
- end
254
- end
255
- end
256
-
257
- self.annotate(new)
258
- new.type = :list
259
-
260
- new
261
- end
262
-
263
- def to_list(&block)
264
- new = {}
265
- case type
266
- when :double
267
- if block_given?
268
- through do |k,v|
269
- if block.arity == 1
270
- new[k] = v.collect{|e| yield e}
271
- else
272
- new[k] = yield k, v
273
- end
274
- end
275
- else
276
- through do |k,v|
277
- new[k] = v.collect{|e| e.first}
278
- end
279
- end
280
- when :flat
281
- through do |k,v|
282
- new[k] = [v.first]
283
- end
284
- when :single
285
- through do |k,v|
286
- new[k] = [v]
287
- end
288
- when :list
289
- return self
290
- end
291
- self.annotate(new)
292
- new.type = :list
293
- new
294
- end
295
-
296
- def to_double
297
- new = {}
298
- case type
299
- when :double
300
- return self
301
- when :flat
302
- through do |k,v|
303
- new[k] = v.nil? ? [] : [v]
304
- end
305
- when :single
306
- through do |k,v|
307
- new[k] = v.nil? ? [[]] : [[v]]
308
- end
309
- when :list
310
- if block_given?
311
- through do |k,v|
312
- if v.nil?
313
- new[k] = nil
314
- else
315
- new[k] = v.collect{|e| yield e}
316
- end
317
- end
318
- else
319
- through do |k,v|
320
- if v.nil?
321
- new[k] = nil
322
- else
323
- new[k] = v.collect{|e| [e]}
324
- end
325
- end
326
- end
327
- end
328
- self.annotate(new)
329
- new.type = :double
330
- new
331
- end
332
-
333
- def to_flat(field = nil)
334
- new = {}
335
- case type
336
- when :double
337
- if field.nil?
338
- through do |k,v| new[k] = v.first end
339
- elsif field == :all
340
- through do |k,v| new[k] = v.flatten.compact end
341
- else
342
- pos = identify_field field
343
- through do |k,v| new[k] = v[pos] end
344
- end
345
- when :flat
346
- return self
347
- when :single
348
- through do |k,v|
349
- new[k] = [v]
350
- end
351
- when :list
352
- through do |k,v|
353
- new[k] = [v.first]
354
- end
355
- end
356
- self.annotate(new)
357
- if new.fields
358
- case field
359
- when nil
360
- new.fields = new.fields[0..0]
361
- when :all
362
- new.fields = [new.fields * "+"]
363
- else
364
- new.fields = [field]
365
- end
366
- end
367
- new.type = :flat
368
- new
369
- end
370
-
371
- def to_single
372
- new = {}
373
-
374
- if block_given?
375
- through do |k,v|
376
- new[k] = yield v
377
- end
378
- else
379
- case type
380
- when :double
381
- through do |k,v|
382
- new[k] = v.first.first
383
- end
384
- when :flat
385
- through do |k,v|
386
- new[k] = v.first
387
- end
388
- when :single
389
- return self
390
- when :list
391
- through do |k,v|
392
- new[k] = v.nil? ? nil : v.first
393
- end
394
- end
395
- end
396
-
397
- self.annotate(new)
398
- new.type = :single
399
- new.fields = [new.fields.first] if new.fields.length > 1
400
- new
401
- end
402
-
403
-
404
- def to_onehot(boolean = false)
405
- all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
406
- index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
407
- index.cast = :to_i unless boolean
408
- through do |key,values|
409
- v = all_values.collect{|_v| values.include?(_v)}
410
- v = v.collect{|_v| _v ? 1 : 0 } unless boolean
411
- index[key] = v
412
- end
413
- index
414
- end
415
-
416
- def merge(other)
417
- self.annotate(super(other))
418
- end
419
- end
420
-
1
+ #require 'rbbt/resource/path'
2
+ #module TSV
3
+ #
4
+ # def self.stream_column(file, column)
5
+ # header = TSV.parse_header(file)
6
+ # pos = header.fields.index(column) + 1
7
+ # sep2 = header.options[:sep2] || "|"
8
+ # case header.type.to_s
9
+ # when nil, "double"
10
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
11
+ # next if line =~ /^#/
12
+ # line.split("\t")[pos].gsub(sep2, "\n")
13
+ # end
14
+ # when "single"
15
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
16
+ # next if line =~ /^#/
17
+ # line.split("\t")[1]
18
+ # end
19
+ # when "flat"
20
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
21
+ # next if line =~ /^#/
22
+ # line.split("\t")[1..-1] * "\n"
23
+ # end
24
+ # when 'list'
25
+ # TSV.traverse file, :type => :array, :into => :stream do |line|
26
+ # next if line =~ /^#/
27
+ # line.split("\t")[pos]
28
+ # end
29
+ # end
30
+ # end
31
+ #
32
+ # def self.guess_id(identifier_file, values, options = {})
33
+ # field_matches = TSV.field_match_counts(identifier_file, values, options)
34
+ # field_matches.sort_by{|field, count| count.to_i}.last
35
+ # end
36
+ #
37
+ # def self.field_match_counts(file, values, options = {})
38
+ # options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
39
+ # persist_options = IndiferentHash.pull_keys options, :persist
40
+ #
41
+ # filename = TSV === file ? file.filename : file
42
+ # path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
43
+ # tsv = TSV === file ? file : TSV.open(file, options)
44
+ #
45
+ # text = ""
46
+ # fields = nil
47
+ # tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
48
+ # names.zip(fields).each do |list, format|
49
+ # list = [list] unless Array === list
50
+ # list.delete_if do |name| name.empty? end
51
+ # next if list.empty?
52
+ # text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
53
+ # end
54
+ # text << [gene, tsv.key_field] * "\t" << "\n"
55
+ # end
56
+ # text
57
+ # end
58
+ #
59
+ # TmpFile.with_file(values.uniq * "\n", false) do |value_file|
60
+ # cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
61
+ # begin
62
+ # TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
63
+ # rescue
64
+ # Log.exception $!
65
+ # TSV.setup({}, :type => :single, :cast => :to_i)
66
+ # end
67
+ # end
68
+ # end
69
+ #
70
+ # def self.get_filename(file)
71
+ # case
72
+ # when (defined? Step and Step === file)
73
+ # file.path
74
+ # when Path === file
75
+ # file
76
+ # when (String === file and (Open.exists? file or Open.remote? file))
77
+ # file
78
+ # when String === file
79
+ # "String-#{Misc.digest file}"
80
+ # when file.respond_to?(:filename)
81
+ # file.filename
82
+ # when file.respond_to?(:gets)
83
+ # nil
84
+ # else
85
+ # raise "Cannot get filename from: #{file.inspect}"
86
+ # end
87
+ # end
88
+ #
89
+ # def self.abort_stream(file, exception = nil)
90
+ # return if file.nil?
91
+ # if defined? Step and Step === file
92
+ # if exception
93
+ # file.exception exception
94
+ # else
95
+ # if not (file.aborted? or file.done?)
96
+ # file.abort
97
+ # end
98
+ # end
99
+ # elsif Hash === file or Array === file
100
+ # return
101
+ # else
102
+ # stream = get_stream(file)
103
+ # stream.abort(exception) if stream.respond_to? :abort
104
+ # AbortedStream.setup(stream, exception) unless stream.respond_to?(:exception) && stream.exception
105
+ # end
106
+ # end
107
+ #
108
+ # def self.get_stream(file, open_options = {})
109
+ # case file
110
+ # when Zlib::GzipReader
111
+ # file
112
+ # when (defined? Bgzf and Bgzf)
113
+ # file
114
+ # when TSV
115
+ # file.dumper_stream
116
+ # when TSV::Dumper
117
+ # file.stream
118
+ # when TSV::Parser
119
+ # file.stream
120
+ # when Path
121
+ # file.open(open_options)
122
+ # when (defined? Tempfile and Tempfile)
123
+ # begin
124
+ # pos = file.pos
125
+ # file.rewind if file.respond_to?(:rewind) and pos != 0
126
+ # rescue Exception
127
+ # end
128
+ # file
129
+ # when IO, StringIO, File
130
+ # begin
131
+ # pos = file.pos
132
+ # file.rewind if file.respond_to?(:rewind) and pos != 0
133
+ # rescue
134
+ # end
135
+ # file
136
+ # when String
137
+ # if Open.remote?(file) || Open.ssh?(file) || Open.exist?(file)
138
+ # Open.open(file, open_options)
139
+ # else
140
+ # StringIO.new file
141
+ # end
142
+ # when (defined? Step and Step)
143
+ # if file.respond_to?(:base_url)
144
+ # if file.result and IO === file.result
145
+ # file.result
146
+ # else
147
+ # file.join
148
+ # get_stream(file.path, open_options.merge(:nocache => true))
149
+ # end
150
+ # else
151
+ # file.grace
152
+ #
153
+ # stream = file.get_stream
154
+ # if stream && ! stream.closed?
155
+ # stream
156
+ # else
157
+ # file.join
158
+ # raise "Aborted stream from Step #{file.path}" if file.aborted?
159
+ # raise "Exception in stream from Step #{file.path}: #{file.messages.last}" if file.error?
160
+ # get_stream(file.path, open_options)
161
+ # end
162
+ # end
163
+ # when Array
164
+ # Misc.open_pipe do |sin|
165
+ # file.each do |l|
166
+ # sin.puts l
167
+ # end
168
+ # end
169
+ # when Set
170
+ # get_stream(file.to_a, open_options)
171
+ # when Enumerable
172
+ # file
173
+ # else
174
+ # raise "Cannot get stream from: #{file.inspect}"
175
+ # end
176
+ # end
177
+ #
178
+ # def self.identify_field(key_field, fields, field)
179
+ # case field
180
+ # when nil
181
+ # :key
182
+ # when Symbol
183
+ # field == :key ? field : identify_field(key_field, fields, field.to_s)
184
+ # when Integer
185
+ # field
186
+ # when (fields.nil? and String)
187
+ # raise "No field information available and specified field not numeric: #{ field }" unless field =~ /^\d+$/
188
+ # identify_field(key_field, fields, field.to_i)
189
+ # when String
190
+ # return :key if key_field == field
191
+ # pos = fields.index field
192
+ # return pos if pos
193
+ # return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
194
+ # if fields.select{|f| f.include?("(") }.any?
195
+ # simplify_fields = fields.collect do |f|
196
+ # if m = f.match(/(.*)\s+\(.*\)/)
197
+ # m[1]
198
+ # else
199
+ # f
200
+ # end
201
+ # end
202
+ # return identify_field(key_field, simplify_fields, field)
203
+ # end
204
+ # raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
205
+ # else
206
+ # raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
207
+ # end
208
+ # end
209
+ #
210
+ #
211
+ #
212
+ # def self.header_lines(key_field, fields, entry_hash = nil)
213
+ # if Hash === entry_hash
214
+ # sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
215
+ # preamble = entry_hash[:preamble]
216
+ # header_hash = entry_hash[:header_hash]
217
+ # end
218
+ #
219
+ # header_hash = "#" if header_hash.nil?
220
+ #
221
+ # preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
222
+ #
223
+ # str = ""
224
+ # str << preamble.strip << "\n" if preamble and not preamble.empty?
225
+ # if fields
226
+ # if fields.empty?
227
+ # str << header_hash << (key_field || "ID").to_s << "\n"
228
+ # else
229
+ # str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
230
+ # end
231
+ # end
232
+ #
233
+ # str
234
+ # end
235
+ #
236
+ # def identify_field(field)
237
+ # TSV.identify_field(key_field, fields, field)
238
+ # end
239
+ #
240
+ # def rename_field(field, new)
241
+ # self.fields = self.fields.collect{|f| f == field ? new : f }
242
+ # self
243
+ # end
244
+ #
245
+ # def unzip_replicates
246
+ # raise "Can only unzip replicates in :double TSVs" unless type == :double
247
+ #
248
+ # new = {}
249
+ # self.with_unnamed do
250
+ # through do |k,vs|
251
+ # Misc.zip_fields(vs).each_with_index do |v,i|
252
+ # new[k + "(#{i})"] = v
253
+ # end
254
+ # end
255
+ # end
256
+ #
257
+ # self.annotate(new)
258
+ # new.type = :list
259
+ #
260
+ # new
261
+ # end
262
+ #
263
+ # def to_list(&block)
264
+ # new = {}
265
+ # case type
266
+ # when :double
267
+ # if block_given?
268
+ # through do |k,v|
269
+ # if block.arity == 1
270
+ # new[k] = v.collect{|e| yield e}
271
+ # else
272
+ # new[k] = yield k, v
273
+ # end
274
+ # end
275
+ # else
276
+ # through do |k,v|
277
+ # new[k] = v.collect{|e| e.first}
278
+ # end
279
+ # end
280
+ # when :flat
281
+ # through do |k,v|
282
+ # new[k] = [v.first]
283
+ # end
284
+ # when :single
285
+ # through do |k,v|
286
+ # new[k] = [v]
287
+ # end
288
+ # when :list
289
+ # return self
290
+ # end
291
+ # self.annotate(new)
292
+ # new.type = :list
293
+ # new
294
+ # end
295
+ #
296
+ # def to_double
297
+ # new = {}
298
+ # case type
299
+ # when :double
300
+ # return self
301
+ # when :flat
302
+ # through do |k,v|
303
+ # new[k] = v.nil? ? [] : [v]
304
+ # end
305
+ # when :single
306
+ # through do |k,v|
307
+ # new[k] = v.nil? ? [[]] : [[v]]
308
+ # end
309
+ # when :list
310
+ # if block_given?
311
+ # through do |k,v|
312
+ # if v.nil?
313
+ # new[k] = nil
314
+ # else
315
+ # new[k] = v.collect{|e| yield e}
316
+ # end
317
+ # end
318
+ # else
319
+ # through do |k,v|
320
+ # if v.nil?
321
+ # new[k] = nil
322
+ # else
323
+ # new[k] = v.collect{|e| [e]}
324
+ # end
325
+ # end
326
+ # end
327
+ # end
328
+ # self.annotate(new)
329
+ # new.type = :double
330
+ # new
331
+ # end
332
+ #
333
+ # def to_flat(field = nil)
334
+ # new = {}
335
+ # case type
336
+ # when :double
337
+ # if field.nil?
338
+ # through do |k,v| new[k] = v.first end
339
+ # elsif field == :all
340
+ # through do |k,v| new[k] = v.flatten.compact end
341
+ # else
342
+ # pos = identify_field field
343
+ # through do |k,v| new[k] = v[pos] end
344
+ # end
345
+ # when :flat
346
+ # return self
347
+ # when :single
348
+ # through do |k,v|
349
+ # new[k] = [v]
350
+ # end
351
+ # when :list
352
+ # through do |k,v|
353
+ # new[k] = [v.first]
354
+ # end
355
+ # end
356
+ # self.annotate(new)
357
+ # if new.fields
358
+ # case field
359
+ # when nil
360
+ # new.fields = new.fields[0..0]
361
+ # when :all
362
+ # new.fields = [new.fields * "+"]
363
+ # else
364
+ # new.fields = [field]
365
+ # end
366
+ # end
367
+ # new.type = :flat
368
+ # new
369
+ # end
370
+ #
371
+ # def to_single
372
+ # new = {}
373
+ #
374
+ # if block_given?
375
+ # through do |k,v|
376
+ # new[k] = yield v
377
+ # end
378
+ # else
379
+ # case type
380
+ # when :double
381
+ # through do |k,v|
382
+ # new[k] = v.first.first
383
+ # end
384
+ # when :flat
385
+ # through do |k,v|
386
+ # new[k] = v.first
387
+ # end
388
+ # when :single
389
+ # return self
390
+ # when :list
391
+ # through do |k,v|
392
+ # new[k] = v.nil? ? nil : v.first
393
+ # end
394
+ # end
395
+ # end
396
+ #
397
+ # self.annotate(new)
398
+ # new.type = :single
399
+ # new.fields = [new.fields.first] if new.fields.length > 1
400
+ # new
401
+ # end
402
+ #
403
+ #
404
+ # def to_onehot(boolean = false)
405
+ # all_values = values.flatten.uniq.collect{|v| v.to_s}.sort
406
+ # index = TSV.setup({}, :key_field => key_field, :fields => all_values, :type => :list)
407
+ # index.cast = :to_i unless boolean
408
+ # through do |key,values|
409
+ # v = all_values.collect{|_v| values.include?(_v)}
410
+ # v = v.collect{|_v| _v ? 1 : 0 } unless boolean
411
+ # index[key] = v
412
+ # end
413
+ # index
414
+ # end
415
+ #
416
+ # def merge(other)
417
+ # self.annotate(super(other))
418
+ # end
419
+ #end
420
+ #