rbbt-util 5.6.0 → 5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b772991027bae39d5d75e7d19fdfc628b2a78871
4
- data.tar.gz: aed0bd56fbd0178bff6cd37b386390a392a5a7ec
3
+ metadata.gz: 3656ab04698a077becda559ac837570042688fa8
4
+ data.tar.gz: f54aa65f719933df04767b54e896c74adb805e34
5
5
  SHA512:
6
- metadata.gz: 6aeb956836ee4d77b643249dc63c13ba6aed3aa9568b58173eb872c45e384d5202ea8e742c55e775187371f4e5b438198d57f56896e3daad9041158320692677
7
- data.tar.gz: eab7947a295e70759b50bf6333c589f7c71aa535d5c5c26f0c848aacb5b9f723505799bb4baec4c16f30b5fe3f9a5ed8715f3f6326612b33b342823622428497
6
+ metadata.gz: 929e4be6771df3db4f8507a9af317cb9bd841107d4170ec2ff198f1e1ed2aca0762e5b32851b1653336afdae49dc2729b0143fd65938cedd11660db2a5d687fc
7
+ data.tar.gz: 63ce2e0a7e12c9e6c6204a5d79764e9691ff781ae68e27586b135c524710ce557ce89a958aff4b7c101aa3cfd67897fdcb0ec415b5b09f315dad8a27ce437199
@@ -484,10 +484,8 @@ module TSV
484
484
 
485
485
  str = ""
486
486
 
487
- str << "#: " << Misc.hash2string((ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
488
- if fields
489
- str << "#" << key_field << "\t" << fields * "\t" << "\n"
490
- end
487
+ entry_hash = no_options ? {} : (ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}
488
+ str = TSV.header_lines(key_field, fields, entry_hash)
491
489
 
492
490
  with_unnamed do
493
491
  if keys.nil?
@@ -3,7 +3,11 @@ require 'rbbt/tsv/attach/util'
3
3
  module TSV
4
4
 
5
5
  # Merge columns from different rows of a file
6
- def self.merge_row_fields(input, output, sep = "\t")
6
+ def self.merge_row_fields(input, output, options = {})
7
+ options = Misc.add_defaults options, :sep => "\t"
8
+ key_field, fields = Misc.process_options options, :key_field, :fields
9
+ sep = options[:sep]
10
+
7
11
  is = case
8
12
  when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
9
13
  CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
@@ -12,16 +16,25 @@ module TSV
12
16
  else
13
17
  input
14
18
  end
19
+
20
+ if key_field.nil? or fields.nil?
21
+ parser = TSV::Parser.new(is, options.dup)
22
+ fields ||= parser.fields
23
+ key_field ||= parser.key_field
24
+ line = parser.first_line
25
+ else
26
+ line = is.gets
27
+ end
15
28
 
16
29
  current_key = nil
17
30
  current_parts = []
18
31
 
19
32
  done = false
20
33
  Open.write(output) do |os|
34
+ os.puts TSV.header_lines(key_field, fields, options)
21
35
 
22
- done = is.eof?
23
- while not done
24
- key, *parts = is.gets.sub("\n",'').split(sep, -1)
36
+ while line
37
+ key, *parts = line.sub("\n",'').split(sep, -1)
25
38
  current_key ||= key
26
39
  case
27
40
  when key.nil?
@@ -39,7 +52,7 @@ module TSV
39
52
  current_parts = parts
40
53
  end
41
54
 
42
- done = is.eof?
55
+ line = is.gets
43
56
  end
44
57
 
45
58
  os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
@@ -48,10 +61,14 @@ module TSV
48
61
  end
49
62
 
50
63
  # Merge two files with the same keys and different fields
51
- def self.merge_different_fields(file1, file2, output, sep = "\t", monitor = false)
64
+ def self.merge_different_fields(file1, file2, output, options = {})
65
+ options = Misc.add_defaults options, :sep => "\t"
66
+ monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
67
+ sep = options[:sep] || "\t"
68
+
52
69
  case
53
70
  when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
54
- size = CMD.cmd("wc -l '#{file1}'").read.to_f if monitor
71
+ size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
55
72
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
56
73
  when (String === file1 or StringIO === file1)
57
74
  size = file1.length if monitor
@@ -80,19 +97,27 @@ module TSV
80
97
 
81
98
  key1 = key2 = nil
82
99
  while key1.nil?
83
- while (line1 = file1.gets) =~ /#/; end
100
+ while (line1 = file1.gets) =~ /^#/
101
+ key_field1, *fields1 = line1.strip.sub('#','').split(sep)
102
+ end
84
103
  key1, *parts1 = line1.sub("\n",'').split(sep, -1)
85
104
  cols1 = parts1.length
86
105
  end
87
106
 
88
107
  while key2.nil?
89
- while (line2 = file2.gets) =~ /#/; end
108
+ while (line2 = file2.gets) =~ /^#/
109
+ key_field2, *fields2 = line2.strip.sub('#','').split(sep)
110
+ end
90
111
  key2, *parts2 = line2.sub("\n",'').split(sep, -1)
91
112
  cols2 = parts2.length
92
113
  end
93
114
 
94
115
  progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
95
116
 
117
+ entry_hash = options
118
+ entry_hash.delete :sep if entry_hash[:sep] == "\t"
119
+ output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
120
+
96
121
  key = key1 < key2 ? key1 : key2
97
122
  parts = [""] * (cols1 + cols2)
98
123
  while not (done1 and done2)
@@ -189,7 +214,7 @@ module TSV
189
214
 
190
215
  def merge_different_fields(other, options = {})
191
216
  TmpFile.with_file do |output|
192
- TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
217
+ TSV.merge_different_fields(self, other, output, options)
193
218
  tsv = TSV.open output, options
194
219
  tsv.key_field = self.key_field unless self.key_field.nil?
195
220
  tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
@@ -34,7 +34,7 @@ module TSV
34
34
 
35
35
  # Process fields line
36
36
 
37
- if line and Misc.fixutf8(line) =~ /^#{@header_hash}/
37
+ while line and Misc.fixutf8(line) =~ /^#{@header_hash}/
38
38
  line.chomp!
39
39
  @fields = line.split(@sep)
40
40
  @key_field = @fields.shift
data/lib/rbbt/tsv/util.rb CHANGED
@@ -1,6 +1,40 @@
1
1
  require 'rbbt/resource/path'
2
2
  module TSV
3
3
 
4
+ def self.reorder_stream(stream, positions, sep = "\t")
5
+ Misc.open_pipe do |sin|
6
+ line = stream.gets
7
+ while line =~ /^#\:/
8
+ sin.puts line
9
+ line = stream.gets
10
+ end
11
+ while line =~ /^#/
12
+ if Hash === positions
13
+ new = (0..line.split(sep).length-1).to_a
14
+ positions.each do |k,v|
15
+ new[k] = v
16
+ new[v] = k
17
+ end
18
+ positions = new
19
+ end
20
+ sin.puts "#" + line.sub!(/^#/,'').strip.split(sep).values_at(*positions).compact * sep
21
+ line = stream.gets
22
+ end
23
+ while line
24
+ if Hash === positions
25
+ new = (0..line.split(sep).length-1).to_a
26
+ positions.each do |k,v|
27
+ new[k] = v
28
+ new[v] = k
29
+ end
30
+ positions = new
31
+ end
32
+ sin.puts line.strip.split(sep).values_at(*positions) * sep
33
+ line = stream.gets
34
+ end
35
+ end
36
+ end
37
+
4
38
  def self.field_match_counts(file, values, options = {})
5
39
  options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
6
40
  persist_options = Misc.pull_keys options, :persist
@@ -52,6 +86,7 @@ module TSV
52
86
  when String === file
53
87
  Open.open(file, open_options)
54
88
  when file.respond_to?(:gets)
89
+ file.rewind if file.respond_to?(:rewind) and file.eof?
55
90
  file
56
91
  else
57
92
  raise "Cannot get stream from: #{file.inspect}"
@@ -76,6 +111,18 @@ module TSV
76
111
  Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
77
112
  end
78
113
  end
114
+
115
+ def self.header_lines(key_field, fields, entry_hash = {})
116
+ sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
117
+
118
+ str = ""
119
+ str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
120
+ if fields
121
+ str << "#" << key_field << sep << fields * sep << "\n"
122
+ end
123
+
124
+ str
125
+ end
79
126
 
80
127
  def identify_field(field)
81
128
  TSV.identify_field(key_field, fields, field)
@@ -55,4 +55,36 @@ module FileCache
55
55
 
56
56
  FileUtils.rm path if File.exist? path
57
57
  end
58
+
59
+ def self.cache_online_elements(ids, pattern = nil, &block)
60
+ ids = [ids] unless Array === ids
61
+
62
+ result_files = {}
63
+ missing = []
64
+ ids.each do |id|
65
+ filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
66
+
67
+ if FileCache.found(filename)
68
+ result_files[id] = FileCache.path(filename)
69
+ else
70
+ missing << id
71
+ end
72
+ end
73
+
74
+ yield(missing).each do |id, content|
75
+ filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
76
+ path = FileCache.path(filename)
77
+ Open.write(path, content)
78
+ result_files[id] = content
79
+ end
80
+
81
+ missing.each do |id|
82
+ filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
83
+ result = yield id
84
+ File.open{|f| f.write(path = FileCache.path(filename)) }
85
+ result_files[id] = path
86
+ end
87
+
88
+ result_files
89
+ end
58
90
  end
data/lib/rbbt/util/log.rb CHANGED
@@ -76,6 +76,10 @@ module Log
76
76
  log(message, ERROR, &block)
77
77
  end
78
78
 
79
+ def self.exception(e)
80
+ error(e.message)
81
+ error("BACKTRACE:\n" + e.backtrace * "\n")
82
+ end
79
83
 
80
84
  case ENV['RBBT_LOG']
81
85
  when 'DEBUG'
@@ -1318,6 +1318,7 @@ end
1318
1318
  # Divides the array into +num+ chunks of the same size by placing one
1319
1319
  # element in each chunk iteratively.
1320
1320
  def self.divide(array, num)
1321
+ num = 1 if num == 0
1321
1322
  chunks = []
1322
1323
  num.to_i.times do chunks << [] end
1323
1324
  array.each_with_index{|e, i|
@@ -1341,6 +1342,23 @@ end
1341
1342
  chunks
1342
1343
  end
1343
1344
 
1345
+ def self.open_pipe
1346
+ sout, sin = IO.pipe
1347
+ raise "No block given" unless block_given?
1348
+ Thread.new{
1349
+ begin
1350
+ yield sin
1351
+ rescue
1352
+ Log.exception $!
1353
+ raise $!
1354
+ ensure
1355
+ sin.close
1356
+ end
1357
+ }
1358
+ sout
1359
+ end
1360
+
1361
+
1344
1362
  def self.zip_fields(array)
1345
1363
  return [] if array.empty?
1346
1364
  array[0].zip(*array[1..-1])
@@ -28,11 +28,21 @@ module SOPT
28
28
  @description ||= "Missing"
29
29
  end
30
30
 
31
-
32
31
  def self.shortcuts
33
32
  @shortcuts ||= []
34
33
  end
35
34
 
35
+ def self.delete_inputs(inputs)
36
+ inputs.each do |input|
37
+ input = input.to_s
38
+ self.shortcuts.delete self.input_shortcuts.delete(input)
39
+ self.inputs.delete input
40
+ self.input_types.delete input
41
+ self.input_defaults.delete input
42
+ self.input_descriptions.delete input
43
+ end
44
+ end
45
+
36
46
  def self.all
37
47
  @all ||= {}
38
48
  end
data/lib/rbbt/workflow.rb CHANGED
@@ -28,7 +28,7 @@ module Workflow
28
28
  def self.load_workflow_file(filename)
29
29
  begin
30
30
  $LOAD_PATH.unshift(File.join(File.dirname(File.expand_path(filename)), 'lib'))
31
- require filename
31
+ require File.expand_path(filename)
32
32
  Log.debug{"Workflow loaded from: #{ filename }"}
33
33
  return true
34
34
  rescue Exception
@@ -99,6 +99,12 @@ module Workflow
99
99
 
100
100
  # Load locally
101
101
 
102
+ if wf_name =~ /::\w+$/
103
+ clean_name = wf_name.sub(/::.*/,'')
104
+ Log.info{"Looking for '#{wf_name}' in '#{clean_name}'"}
105
+ wf_name = clean_name
106
+ end
107
+
102
108
  Log.info{"Loading workflow #{wf_name}"}
103
109
  require_local_workflow(wf_name) or
104
110
  require_local_workflow(Misc.snake_case(wf_name)) or
@@ -184,7 +184,18 @@ class Step
184
184
  end
185
185
  end
186
186
 
187
-
187
+ def provenance
188
+ provenance = {}
189
+ dependencies.each do |dep|
190
+ next unless dep.path.exists?
191
+ if File.exists? dep.info_file
192
+ provenance[dep.path] = dep.provenance if File.exists? dep.path
193
+ else
194
+ provenance[dep.path] = nil
195
+ end
196
+ end
197
+ {:inputs => info[:inputs], :provenance => provenance}
198
+ end
188
199
  end
189
200
 
190
201
  module Workflow
@@ -264,16 +275,15 @@ module Workflow
264
275
  [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn.to_sym].input_options}
265
276
  end
266
277
 
267
-
268
278
  def real_dependencies(task, jobname, inputs, dependencies)
269
279
  real_dependencies = []
270
280
  dependencies.each do |dependency|
271
- real_dependencies << case
272
- when Step === dependency
281
+ real_dependencies << case dependency
282
+ when Step
273
283
  dependency
274
- when Symbol === dependency
284
+ when Symbol
275
285
  job(dependency, jobname, inputs)
276
- when Proc === dependency
286
+ when Proc
277
287
  dependency.call jobname, inputs
278
288
  end
279
289
  end
@@ -303,7 +313,6 @@ module Workflow
303
313
  }
304
314
  end
305
315
 
306
-
307
316
  def id_for(path)
308
317
  if workdir.respond_to? :find
309
318
  workdir_find = workdir.find
@@ -322,4 +331,5 @@ module Workflow
322
331
 
323
332
  Misc.path_relative_to(workdir_find, File.dirname(path)).sub(/([^\/]+)\/.*/,'\1')
324
333
  end
334
+
325
335
  end
@@ -281,7 +281,11 @@ class Step
281
281
  end
282
282
 
283
283
  def recursive_clean
284
- rec_dependencies.each{|step| step.clean }
284
+ rec_dependencies.each do |step|
285
+ if File.exists?(step.info_file)
286
+ step.clean
287
+ end
288
+ end
285
289
  clean
286
290
  end
287
291
 
@@ -6,7 +6,7 @@ require 'rbbt/workflow/usage'
6
6
 
7
7
  YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
8
8
 
9
- def usage(workflow = nil, task = nil, error = nil)
9
+ def usage(workflow = nil, task = nil, error = nil, backtrace = nil)
10
10
  puts SOPT.doc
11
11
  puts "## WORKFLOW"
12
12
  puts
@@ -27,7 +27,8 @@ def usage(workflow = nil, task = nil, error = nil)
27
27
  workflow.doc(task)
28
28
  if error
29
29
  puts
30
- puts "Error: " << error
30
+ puts Term::ANSIColor.red("Error: ") << error
31
+ puts Term::ANSIColor.red("Backtrace: ") << backtrace * "\n"
31
32
  end
32
33
  end
33
34
 
@@ -112,8 +113,10 @@ options = SOPT.get <<EOF
112
113
  -rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
113
114
  -jn--jobname* Job name to use. The name 'Default' is used by default:
114
115
  -pn--printname Print the name of the job and exit without starting it:
116
+ -jf--job_file* Output one of the job produced files:
115
117
  -wd--workdir* Change the working directory of the workflow:
116
118
  -O--output* Save job result into file:
119
+ --provenance Report the jobs provenance:
117
120
  --fork Run job asyncronously:
118
121
  EOF
119
122
 
@@ -178,13 +181,13 @@ workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir))) if opt
178
181
  job = workflow.job(task.name, name, job_options)
179
182
 
180
183
  # clean job
181
- if clean and job.done? != false
184
+ if clean
182
185
  job.clean
183
186
  sleep 1
184
187
  job = workflow.job(task.name, name, job_options)
185
188
  end
186
189
 
187
- if recursive_clean and job.done?
190
+ if recursive_clean
188
191
  job.recursive_clean
189
192
  sleep 1
190
193
  job = workflow.job(task.name, name, job_options)
@@ -192,46 +195,59 @@ end
192
195
 
193
196
  # run
194
197
  begin
195
- if do_exec
196
- res = job.exec
197
- case
198
- when Array === res
199
- puts res * "\n"
200
- when TSV === res
201
- puts res
202
- when Hash === res
203
- puts res.to_yaml
198
+ if do_exec
199
+ res = job.exec
200
+ case
201
+ when Array === res
202
+ puts res * "\n"
203
+ when TSV === res
204
+ puts res
205
+ when Hash === res
206
+ puts res.to_yaml
207
+ else
208
+ puts res
209
+ end
210
+ exit 0
211
+ end
212
+
213
+ if do_fork
214
+ job.fork
215
+ while not job.done?
216
+ message = job.messages ? job.messages.last : "no message"
217
+ Log.debug "#{job.status}: #{message}"
218
+ sleep 2
219
+ end
220
+ raise job.messages.last if job.error?
221
+ res = job.load
204
222
  else
205
- puts res
223
+ res = job.run(true)
206
224
  end
207
- exit 0
208
- end
209
225
 
210
- if do_fork
211
- job.fork
212
- while not job.done?
213
- message = job.messages ? job.messages.last : "no message"
214
- Log.debug "#{job.status}: #{message}"
215
- sleep 2
226
+
227
+ if options.delete(:provenance)
228
+ require 'pp'
229
+ pp job.provenance
230
+ exit 0
216
231
  end
217
- raise job.messages.last if job.error?
218
- res = job.load
219
- else
220
- res = job.run(true)
221
- end
222
232
 
223
- if options.delete(:printname)
224
- puts job.name
225
- exit 0
226
- else
227
- Log.low "Job name: #{job.name}"
228
- end
233
+ if options.delete(:printname)
234
+ puts job.name
235
+ exit 0
236
+ else
237
+ Log.low "Job name: #{job.name}"
238
+ end
229
239
  rescue ParameterException
230
- usage(workflow, task, $!.message)
240
+ SOPT.delete_inputs(workflow.rec_inputs(task.name))
241
+ usage(workflow, task, $!.message, $!.backtrace)
231
242
  end
232
243
 
233
244
  out = options.include?(:output) ? File.open(options[:output], 'wb') : STDOUT
234
245
 
246
+ if job_file = options.delete(:job_file)
247
+ out.puts res.file(job_file).read
248
+ exit 0
249
+ end
250
+
235
251
  if Step === res
236
252
  out.puts Open.read(res.path) if File.exists? res.path
237
253
  else
@@ -29,7 +29,7 @@ class TestAssociationIndex < Test::Unit::TestCase
29
29
  genes = tp53.pina_interactors.ensembl
30
30
  genes << tp53
31
31
 
32
- Misc.benchmark(10) do
32
+ Misc.benchmark(100) do
33
33
  index.subset_entities("Ensembl Gene ID" => genes).length
34
34
  end
35
35
 
@@ -76,8 +76,8 @@ g 25
76
76
 
77
77
  def test_range
78
78
  data =<<-EOF
79
+ ##012345678901234567890
79
80
  #ID:Range
80
- #:012345678901234567890
81
81
  a: ______
82
82
  b: ______
83
83
  c: _______
@@ -55,53 +55,12 @@ class TestKnowledgeBase < Test::Unit::TestCase
55
55
  assert @kb.all_databases.include? "pina"
56
56
  end
57
57
 
58
- def test_items
59
- tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
60
- kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
61
- kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
62
- end
63
-
64
- def __test_subset
58
+ def test_subset
65
59
  gene = "TP53"
66
60
  found = Genomics.knowledge_base.identify :pina, gene
67
- p53_interactors = Misc.profile{ Genomics.knowledge_base.children(:pina, found).target_entity }
61
+ p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity
68
62
 
69
-
70
- Misc.profile do
71
- puts Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).length
72
- end
73
- ddd 2
74
- #assert Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).target_entities.name.include? "MDM2"
75
- end
76
-
77
- def test_benchmark
78
- tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
79
- kb = KnowledgeBase.new Rbbt.tmp.test.kb2
80
- kb.namespace = "Hsa/jan2013"
81
-
82
- require 'rbbt/sources/COSMIC'
83
- require 'rbbt/entity/genomic_mutation'
84
- mutations = tp53.COSMIC_mutations
85
- Misc.benchmark(10) do
86
- name = "mutations"
87
- kb.add_index name, "Ensembl Gene ID", "Genomic Mutation", "Change"
88
- kb.write name do
89
- mutations.each do |gm|
90
- kb.add name, tp53, gm, gm.base
91
- end
92
- end
93
- end
94
- end
95
-
96
- def test_benchmark2
97
- tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
98
- kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
99
- kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
100
- l = nil
101
- Misc.benchmark(1000) do
102
- l = tp53.transcripts.length
103
- end
104
- assert l > 0
63
+ assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
105
64
  end
106
65
 
107
66
  def test_syndication
@@ -57,6 +57,7 @@ row2 A B Id3
57
57
 
58
58
  TmpFile.with_file(content) do |filename|
59
59
  tsv = TSV.open(filename, :sep => /\s+/)
60
+ puts tsv.to_s
60
61
  assert tsv.to_s =~ /row1\ta|aa|aaa/
61
62
  assert tsv.to_s =~ /:type=:double/
62
63
  end
@@ -87,7 +87,6 @@ B Id3
87
87
 
88
88
  tsv1.attach_source_key tsv2, "ValueB"
89
89
 
90
-
91
90
  assert_equal %w(ValueA ValueB OtherID), tsv1.fields
92
91
  assert_equal "Id1", tsv1["row1"]["OtherID"]
93
92
  end
@@ -290,7 +289,7 @@ row6 dd dd ee
290
289
  EOF
291
290
 
292
291
  TmpFile.with_file do |f|
293
- TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, " ")
292
+ TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, :sep => " ")
294
293
  assert_equal result, Open.read(f)
295
294
  end
296
295
  end
@@ -327,6 +326,7 @@ row6 dd dd ee
327
326
 
328
327
  def test_merge_different_rows_split_lines
329
328
  file1 =<<-EOF
329
+ #ID,letterA,letterB,letterC
330
330
  row6,dd,dd,ee
331
331
  row1,a,b,c
332
332
  row1,aa,bb,cc
@@ -334,6 +334,7 @@ row2,A,B,C
334
334
  row3,1,2,3
335
335
  EOF
336
336
  file2 =<<-EOF
337
+ #ID,letterD,letterE
337
338
  row20,rr,rr
338
339
  row1,d,e
339
340
  row2,D,E
@@ -342,6 +343,8 @@ row4,x,y
342
343
 
343
344
  # Might be slightly different ...
344
345
  result1 =<<-EOF
346
+ #: :sep=,
347
+ #ID,letterA,letterB,letterC,letterD,letterE
345
348
  row1,aa|a,bb|b,cc|c,d,e
346
349
  row2,A,B,C,D,E
347
350
  row20,,,,rr,rr
@@ -350,6 +353,8 @@ row4,,,,x,y
350
353
  row6,dd,dd,ee,,
351
354
  EOF
352
355
  result2 =<<-EOF
356
+ #: :sep=,
357
+ #ID,letterA,letterB,letterC,letterD,letterE
353
358
  row1,a|aa,b|bb,c|cc,d,e
354
359
  row2,A,B,C,D,E
355
360
  row20,,,,rr,rr
@@ -359,8 +364,7 @@ row6,dd,dd,ee,,
359
364
  EOF
360
365
 
361
366
  TmpFile.with_file do |f|
362
- TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, ','
363
-
367
+ TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, :sep => ','
364
368
  # ... so check for either
365
369
  assert(Open.read(f) == result1 || Open.read(f) == result2)
366
370
  end
@@ -403,6 +407,7 @@ row6,dd,dd,ee,,
403
407
 
404
408
  def test_merge_rows
405
409
  file1 =<<-EOF
410
+ #ID,letterA,letterB,letterC
406
411
  row1,a,b,c
407
412
  row1,aa,bb,cc
408
413
  row2,A,B,C
@@ -410,8 +415,9 @@ row3,1,2,3
410
415
  EOF
411
416
  TmpFile.with_file(file1) do |input|
412
417
  TmpFile.with_file() do |output|
413
- TSV.merge_row_fields Open.open(input), output
414
- assert Open.read(output) =~ /a|aa/
418
+ TSV.merge_row_fields Open.open(input), output, :sep => ','
419
+ assert Open.read(output) =~ /^#ID,letterA,letterB,letterC$/
420
+ assert Open.read(output).index "a|aa"
415
421
  end
416
422
  end
417
423
 
@@ -6,11 +6,11 @@ require 'rbbt/entity'
6
6
 
7
7
  class TestMisc < Test::Unit::TestCase
8
8
 
9
- def _test_humanize
9
+ def test_humanize
10
10
  assert_equal "mutation_enrichment", Misc.humanize("MutationEnrichment")
11
11
  end
12
12
 
13
- def _test_fixutf8
13
+ def test_fixutf8
14
14
  string = "abc\xffdef"
15
15
  string = string.force_encoding("UTF-8") if string.respond_to? :force_encoding
16
16
  assert(! string.valid_encoding?) if string.respond_to? :valid_encoding?
@@ -19,37 +19,37 @@ class TestMisc < Test::Unit::TestCase
19
19
  assert( Misc.fixutf8(string).valid_encoding) if string.respond_to? :valid_encoding
20
20
  end
21
21
 
22
- def _test_colors_for
22
+ def test_colors_for
23
23
  colors, used = Misc.colors_for([1,2,2,1,2,1,2,2,3,3,2,3,2])
24
24
  assert_equal Misc::COLOR_LIST[1], used[2]
25
25
  end
26
26
 
27
- def _test_total_length
27
+ def test_total_length
28
28
  ranges = [(0..100), (50..150), (120..160)]
29
29
  ranges = [(0..100), (50..150), (120..160), (51..70)]
30
30
  assert_equal 161, Misc.total_length(ranges)
31
31
  end
32
32
 
33
- def _test_id_filename?
33
+ def test_id_filename?
34
34
  TmpFile.with_file("") do |file|
35
35
  assert Misc.is_filename?(file)
36
36
  assert ! Misc.is_filename?("TEST STRING")
37
37
  end
38
38
  end
39
39
 
40
- def _test_merge_sorted_arrays
40
+ def test_merge_sorted_arrays
41
41
  assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
42
42
  end
43
43
 
44
- def _test_intersect_sorted_arrays
44
+ def test_intersect_sorted_arrays
45
45
  assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
46
46
  end
47
47
 
48
- def _test_sorted_array_matches
48
+ def test_sorted_array_matches
49
49
  assert_equal [1,3], Misc.sorted_array_hits(%w(a b c d e), %w(b d))
50
50
  end
51
51
 
52
- def _test_binary_include?
52
+ def test_binary_include?
53
53
  a = %w(a b c d e).sort
54
54
  assert Misc.binary_include?(a, "a")
55
55
  assert(!Misc.binary_include?(a, "z"))
@@ -58,24 +58,24 @@ class TestMisc < Test::Unit::TestCase
58
58
  assert(Misc.binary_include?(a, "d"))
59
59
  end
60
60
 
61
- def _test_process_to_hash
61
+ def test_process_to_hash
62
62
  list = [1,2,3,4]
63
63
  assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
64
64
  end
65
65
 
66
- # def _test_pdf2text_example
66
+ # def test_pdf2text_example
67
67
  # assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
68
68
  # end
69
69
  #
70
- # def _test_pdf2text_EPAR
70
+ # def test_pdf2text_EPAR
71
71
  # assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
72
72
  # end
73
73
  #
74
- # def _test_pdf2text_wrong
74
+ # def test_pdf2text_wrong
75
75
  # assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
76
76
  # end
77
77
 
78
- def _test_string2hash
78
+ def test_string2hash
79
79
  assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
80
80
  assert_equal(true, Misc.string2hash(":true")[:true])
81
81
  assert_equal(true, Misc.string2hash("true")["true"])
@@ -93,17 +93,17 @@ class TestMisc < Test::Unit::TestCase
93
93
  end
94
94
  end
95
95
 
96
- def _test_named_array
96
+ def test_named_array
97
97
  a = NamedArray.setup([1,2,3,4], %w(a b c d))
98
98
  assert_equal(1, a['a'])
99
99
  end
100
100
 
101
- # def _test_path_relative_to
101
+ # def test_path_relative_to
102
102
  # assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
103
103
  # end
104
104
 
105
- # def _test_chunk
106
- # _test =<<-EOF
105
+ # def test_chunk
106
+ # test =<<-EOF
107
107
  #This is an example file. Entries are separated by Entry
108
108
  #-- Entry
109
109
  #1
@@ -118,7 +118,7 @@ class TestMisc < Test::Unit::TestCase
118
118
  # assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
119
119
  # end
120
120
 
121
- def _test_hash2string
121
+ def test_hash2string
122
122
  hash = {}
123
123
  assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
124
124
 
@@ -136,14 +136,14 @@ class TestMisc < Test::Unit::TestCase
136
136
 
137
137
  end
138
138
 
139
- def _test_merge
139
+ def test_merge
140
140
  a = [[1],[2]]
141
141
  a = NamedArray.setup a, %w(1 2)
142
142
  a.merge [3,4]
143
143
  assert_equal [1,3], a[0]
144
144
  end
145
145
 
146
- def _test_indiferent_hash
146
+ def test_indiferent_hash
147
147
  a = {:a => 1, "b" => 2}
148
148
  a.extend IndiferentHash
149
149
 
@@ -153,7 +153,7 @@ class TestMisc < Test::Unit::TestCase
153
153
  assert_equal 2, a[:b]
154
154
  end
155
155
 
156
- def _test_lockfile
156
+ def test_lockfile
157
157
 
158
158
  TmpFile.with_file do |tmpfile|
159
159
  pids = []
@@ -177,7 +177,7 @@ class TestMisc < Test::Unit::TestCase
177
177
  end
178
178
  end
179
179
 
180
- def _test_positions2hash
180
+ def test_positions2hash
181
181
  inputs = Misc.positional2hash([:one, :two, :three], 1, :two => 2, :four => 4)
182
182
  assert_equal 1, inputs[:one]
183
183
  assert_equal 2, inputs[:two]
@@ -185,7 +185,7 @@ class TestMisc < Test::Unit::TestCase
185
185
  assert_equal nil, inputs[:four]
186
186
  end
187
187
 
188
- def _test_mean
188
+ def test_mean
189
189
  assert_equal 2, Misc.mean([1,2,3])
190
190
  assert_equal 3, Misc.mean([1,2,3,4,5])
191
191
  end
@@ -194,32 +194,32 @@ class TestMisc < Test::Unit::TestCase
194
194
  assert_equal Math.sqrt(2), Misc.sd([1,3])
195
195
  end
196
196
 
197
- def _test_align_small
197
+ def test_align_small
198
198
  reference = "AABCDEBD"
199
199
  sequence = "ABCD"
200
200
  assert_equal '-ABCD---', Misc.fast_align(reference, sequence).last
201
201
  end
202
202
 
203
- def _test_align_real
203
+ def test_align_real
204
204
  reference = "SGNECNKAIDGNKDTFWHTFYGANGDPKPPPHTYTIDMKTTQNVNGLSMLPRQDGNQNGWIGRHEVYLSSDGTNW"
205
205
  sequence = "TYTIDMKTTQNVNGLSML"
206
206
  assert_equal "--------------------------------TYTIDMKTTQNVNGLSML-------------------------", Misc.fast_align(reference, sequence).last
207
207
  end
208
208
 
209
- def _test_divide
209
+ def test_divide
210
210
  assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
211
211
  end
212
212
 
213
- def _test_ordered_divide
213
+ def test_ordered_divide
214
214
  assert_equal 5, Misc.ordered_divide(%w(1 2 3 4 5 6 7 8 9),2).length
215
215
  end
216
216
 
217
- def _test_collapse_ranges
217
+ def test_collapse_ranges
218
218
  ranges = [(0..100), (50..150), (51..61),(200..250), (300..324),(320..350)]
219
219
  assert_equal [(0..150),(200..250), (300..350)], Misc.collapse_ranges(ranges)
220
220
  end
221
221
 
222
- def _test_humanize
222
+ def test_humanize
223
223
  str1 = "test_string"
224
224
  str2 = "TEST_string"
225
225
  str3 = "test"
@@ -229,18 +229,18 @@ class TestMisc < Test::Unit::TestCase
229
229
  assert_equal "Test", Misc.humanize(str3)
230
230
  end
231
231
 
232
- def _test_snake_case
232
+ def test_snake_case
233
233
  str1 = "ACRONIMTest"
234
234
  str2 = "ACRONIM_test"
235
235
  assert_equal "ACRONIM_test", Misc.snake_case(str1)
236
236
  assert_equal "ACRONIM_test", Misc.snake_case(str2)
237
237
  end
238
238
 
239
- def _test_correct_vcf_mutations
239
+ def test_correct_vcf_mutations
240
240
  assert_equal [737407, ["-----", "-----G", "-----GTTAAT"]], Misc.correct_vcf_mutation(737406, "GTTAAT", "G,GG,GGTTAAT")
241
241
  end
242
242
 
243
- def _test_fingerprint
243
+ def test_fingerprint
244
244
  puts Misc.fingerprint({:a => 1})
245
245
  end
246
246
 
@@ -253,4 +253,26 @@ class TestMisc < Test::Unit::TestCase
253
253
  end
254
254
  end
255
255
  end
256
+
257
+ def test_pipe
258
+ t = 5
259
+ stream = Misc.open_pipe do |sin|
260
+ t.times do |i|
261
+ puts "Calculating line #{ i }"
262
+ sleep 0.5
263
+ sin.puts "LINE #{ i }"
264
+ end
265
+ end
266
+
267
+ time = Time.now
268
+ lines = []
269
+ while line = stream.gets
270
+ lines << line.strip
271
+ end
272
+ time_spent = Time.new - time
273
+
274
+ assert time_spent >= t * 0.5
275
+ assert time_spent <= (t+1) * 0.5
276
+ assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
277
+ end
256
278
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.6.0
4
+ version: 5.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-10 00:00:00.000000000 Z
11
+ date: 2014-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake