RubyGems - rbbt-util - Versions diffs - 5.6.0 → 5.6.1 - Mend

rbbt-util 5.6.0 → 5.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/lib/rbbt/tsv/accessor.rb +2 -4
data/lib/rbbt/tsv/attach.rb +35 -10
data/lib/rbbt/tsv/parser.rb +1 -1
data/lib/rbbt/tsv/util.rb +47 -0
data/lib/rbbt/util/filecache.rb +32 -0
data/lib/rbbt/util/log.rb +4 -0
data/lib/rbbt/util/misc.rb +18 -0
data/lib/rbbt/util/simpleopt.rb +11 -1
data/lib/rbbt/workflow.rb +7 -1
data/lib/rbbt/workflow/accessor.rb +17 -7
data/lib/rbbt/workflow/step.rb +5 -1
data/share/rbbt_commands/workflow/task +50 -34
data/test/rbbt/association/test_index.rb +1 -1
data/test/rbbt/test_fix_width_table.rb +1 -1
data/test/rbbt/test_knowledge_base.rb +3 -44
data/test/rbbt/tsv/test_accessor.rb +1 -0
data/test/rbbt/tsv/test_attach.rb +12 -6
data/test/rbbt/util/test_misc.rb +55 -33
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b772991027bae39d5d75e7d19fdfc628b2a78871
-  data.tar.gz: aed0bd56fbd0178bff6cd37b386390a392a5a7ec
+  metadata.gz: 3656ab04698a077becda559ac837570042688fa8
+  data.tar.gz: f54aa65f719933df04767b54e896c74adb805e34
 SHA512:
-  metadata.gz: 6aeb956836ee4d77b643249dc63c13ba6aed3aa9568b58173eb872c45e384d5202ea8e742c55e775187371f4e5b438198d57f56896e3daad9041158320692677
-  data.tar.gz: eab7947a295e70759b50bf6333c589f7c71aa535d5c5c26f0c848aacb5b9f723505799bb4baec4c16f30b5fe3f9a5ed8715f3f6326612b33b342823622428497
+  metadata.gz: 929e4be6771df3db4f8507a9af317cb9bd841107d4170ec2ff198f1e1ed2aca0762e5b32851b1653336afdae49dc2729b0143fd65938cedd11660db2a5d687fc
+  data.tar.gz: 63ce2e0a7e12c9e6c6204a5d79764e9691ff781ae68e27586b135c524710ce557ce89a958aff4b7c101aa3cfd67897fdcb0ec415b5b09f315dad8a27ce437199

data/lib/rbbt/tsv/accessor.rb CHANGED Viewed

@@ -484,10 +484,8 @@ module TSV
     str = ""
-    str << "#: " << Misc.hash2string((ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
-    if fields
-      str << "#" << key_field << "\t" << fields * "\t" << "\n"
-    end
+    entry_hash = no_options ? {} : (ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}
+    str = TSV.header_lines(key_field, fields, entry_hash)
     with_unnamed do
       if keys.nil?

data/lib/rbbt/tsv/attach.rb CHANGED Viewed

@@ -3,7 +3,11 @@ require 'rbbt/tsv/attach/util'
 module TSV
   # Merge columns from different rows of a file
-  def self.merge_row_fields(input, output, sep = "\t")
+  def self.merge_row_fields(input, output, options = {})
+    options = Misc.add_defaults options, :sep => "\t"
+    key_field, fields = Misc.process_options options, :key_field, :fields
+    sep = options[:sep]
     is = case
          when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
            CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
@@ -12,16 +16,25 @@ module TSV
          else
            input
          end
+    if key_field.nil? or fields.nil?
+      parser = TSV::Parser.new(is, options.dup)
+      fields ||= parser.fields
+      key_field ||= parser.key_field
+      line = parser.first_line
+    else
+      line = is.gets
+    end
     current_key  = nil
     current_parts = []
     done = false
     Open.write(output) do |os|
+      os.puts TSV.header_lines(key_field, fields, options)
-      done = is.eof?
-      while not done
-        key, *parts = is.gets.sub("\n",'').split(sep, -1)
+      while line
+        key, *parts = line.sub("\n",'').split(sep, -1)
         current_key ||= key
         case
         when key.nil?
@@ -39,7 +52,7 @@ module TSV
           current_parts = parts
         end
-        done = is.eof?
+        line = is.gets
       end
       os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
@@ -48,10 +61,14 @@ module TSV
   end
   # Merge two files with the same keys and different fields
-  def self.merge_different_fields(file1, file2, output, sep = "\t", monitor = false)
+  def self.merge_different_fields(file1, file2, output, options = {})
+    options = Misc.add_defaults options, :sep => "\t"
+    monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
+    sep = options[:sep] || "\t"
     case
     when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
-      size = CMD.cmd("wc -l '#{file1}'").read.to_f if monitor
+      size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
       file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
     when (String === file1 or StringIO === file1)
       size = file1.length if monitor
@@ -80,19 +97,27 @@ module TSV
     key1 = key2 = nil
     while key1.nil?
-      while (line1 = file1.gets) =~ /#/; end
+      while (line1 = file1.gets) =~ /^#/
+        key_field1, *fields1 = line1.strip.sub('#','').split(sep)
+      end
       key1, *parts1 = line1.sub("\n",'').split(sep, -1)
       cols1 = parts1.length
     end
     while key2.nil?
-      while (line2 = file2.gets) =~ /#/; end
+      while (line2 = file2.gets) =~ /^#/
+        key_field2, *fields2 = line2.strip.sub('#','').split(sep)
+      end
       key2, *parts2 = line2.sub("\n",'').split(sep, -1)
       cols2 = parts2.length
     end
     progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
+    entry_hash = options
+    entry_hash.delete :sep if entry_hash[:sep] == "\t"
+    output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
     key = key1 < key2 ? key1 : key2
     parts = [""] * (cols1 + cols2)
     while not (done1 and done2)
@@ -189,7 +214,7 @@ module TSV
   def merge_different_fields(other, options = {})
     TmpFile.with_file do |output|
-      TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
+      TSV.merge_different_fields(self, other, output, options)
       tsv = TSV.open output, options
       tsv.key_field = self.key_field unless self.key_field.nil?
       tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?

data/lib/rbbt/tsv/parser.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module TSV
       # Process fields line
-      if line and Misc.fixutf8(line) =~ /^#{@header_hash}/
+      while line and Misc.fixutf8(line) =~ /^#{@header_hash}/
         line.chomp!
         @fields = line.split(@sep)
         @key_field = @fields.shift

data/lib/rbbt/tsv/util.rb CHANGED Viewed

@@ -1,6 +1,40 @@
 require 'rbbt/resource/path'
 module TSV
+  def self.reorder_stream(stream, positions, sep = "\t")
+    Misc.open_pipe do |sin|
+      line = stream.gets
+      while line =~ /^#\:/
+        sin.puts line
+        line = stream.gets
+      end
+      while line  =~ /^#/
+        if Hash === positions
+          new = (0..line.split(sep).length-1).to_a
+          positions.each do |k,v|
+            new[k] = v
+            new[v] = k
+          end
+          positions = new
+        end
+        sin.puts "#" + line.sub!(/^#/,'').strip.split(sep).values_at(*positions).compact * sep
+        line = stream.gets
+      end
+      while line
+        if Hash === positions
+          new = (0..line.split(sep).length-1).to_a
+          positions.each do |k,v|
+            new[k] = v
+            new[v] = k
+          end
+          positions = new
+        end
+        sin.puts line.strip.split(sep).values_at(*positions) * sep
+        line = stream.gets
+      end
+    end
+  end
   def self.field_match_counts(file, values, options = {})
     options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
     persist_options = Misc.pull_keys options, :persist
@@ -52,6 +86,7 @@ module TSV
     when String === file
       Open.open(file, open_options)
     when file.respond_to?(:gets)
+      file.rewind if file.respond_to?(:rewind) and file.eof?
       file
     else
       raise "Cannot get stream from: #{file.inspect}"
@@ -76,6 +111,18 @@ module TSV
       Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
     end
   end
+  def self.header_lines(key_field, fields, entry_hash = {})
+    sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
+    str = ""
+    str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
+    if fields
+      str << "#" << key_field << sep << fields * sep << "\n"
+    end
+    str
+  end
   def identify_field(field)
     TSV.identify_field(key_field, fields, field)

data/lib/rbbt/util/filecache.rb CHANGED Viewed

@@ -55,4 +55,36 @@ module FileCache
     FileUtils.rm path if File.exist? path
   end
+  def self.cache_online_elements(ids, pattern = nil, &block)
+    ids = [ids] unless Array === ids
+    result_files = {}
+    missing = []
+    ids.each do |id|
+      filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
+      if FileCache.found(filename)
+        result_files[id] = FileCache.path(filename)
+      else
+        missing << id
+      end
+    end
+    yield(missing).each do |id, content|
+      filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
+      path = FileCache.path(filename)
+      Open.write(path, content)
+      result_files[id] = content
+    end
+    missing.each do |id|
+      filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
+      result = yield id
+      File.open{|f| f.write(path = FileCache.path(filename)) }
+      result_files[id] = path
+    end
+    result_files
+  end
 end

data/lib/rbbt/util/log.rb CHANGED Viewed

@@ -76,6 +76,10 @@ module Log
     log(message, ERROR, &block)
   end
+  def self.exception(e)
+    error(e.message)
+    error("BACKTRACE:\n" + e.backtrace * "\n")
+  end
   case ENV['RBBT_LOG']
   when 'DEBUG'

data/lib/rbbt/util/misc.rb CHANGED Viewed

@@ -1318,6 +1318,7 @@ end
   # Divides the array into +num+ chunks of the same size by placing one
   # element in each chunk iteratively.
   def self.divide(array, num)
+    num = 1 if num == 0
     chunks = []
     num.to_i.times do chunks << [] end
     array.each_with_index{|e, i|
@@ -1341,6 +1342,23 @@ end
     chunks
   end
+  def self.open_pipe
+    sout, sin = IO.pipe
+    raise "No block given" unless block_given?
+    Thread.new{
+      begin
+        yield sin
+      rescue
+        Log.exception $!
+        raise $!
+      ensure
+        sin.close
+      end
+    }
+    sout
+  end
   def self.zip_fields(array)
     return [] if array.empty?
     array[0].zip(*array[1..-1])

data/lib/rbbt/util/simpleopt.rb CHANGED Viewed

@@ -28,11 +28,21 @@ module SOPT
     @description ||= "Missing"
   end
   def self.shortcuts
     @shortcuts ||= []
   end
+  def self.delete_inputs(inputs)
+    inputs.each do |input|
+      input = input.to_s
+      self.shortcuts.delete self.input_shortcuts.delete(input)
+      self.inputs.delete input
+      self.input_types.delete input
+      self.input_defaults.delete input
+      self.input_descriptions.delete input
+    end
+  end
   def self.all
     @all ||= {}
   end

data/lib/rbbt/workflow.rb CHANGED Viewed

@@ -28,7 +28,7 @@ module Workflow
   def self.load_workflow_file(filename)
     begin
       $LOAD_PATH.unshift(File.join(File.dirname(File.expand_path(filename)), 'lib'))
-      require filename
+      require File.expand_path(filename)
       Log.debug{"Workflow loaded from: #{ filename }"}
       return true
     rescue Exception
@@ -99,6 +99,12 @@ module Workflow
     # Load locally
+    if wf_name =~ /::\w+$/
+      clean_name = wf_name.sub(/::.*/,'')
+      Log.info{"Looking for '#{wf_name}' in '#{clean_name}'"}
+      wf_name = clean_name
+    end
     Log.info{"Loading workflow #{wf_name}"}
     require_local_workflow(wf_name) or
     require_local_workflow(Misc.snake_case(wf_name)) or

data/lib/rbbt/workflow/accessor.rb CHANGED Viewed

@@ -184,7 +184,18 @@ class Step
     end
   end
+  def provenance
+    provenance = {}
+    dependencies.each do |dep|
+      next unless dep.path.exists?
+      if File.exists? dep.info_file
+        provenance[dep.path] = dep.provenance if File.exists? dep.path
+      else
+        provenance[dep.path] = nil
+      end
+    end
+    {:inputs => info[:inputs], :provenance => provenance}
+  end
 end
 module Workflow
@@ -264,16 +275,15 @@ module Workflow
     [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn.to_sym].input_options}
   end
   def real_dependencies(task, jobname, inputs, dependencies)
     real_dependencies = []
     dependencies.each do |dependency|
-      real_dependencies << case
-      when Step === dependency
+      real_dependencies << case dependency
+      when Step
         dependency
-      when Symbol === dependency
+      when Symbol
         job(dependency, jobname, inputs)
-      when Proc === dependency
+      when Proc
         dependency.call jobname, inputs
       end
     end
@@ -303,7 +313,6 @@ module Workflow
     }
   end
   def id_for(path)
     if workdir.respond_to? :find
       workdir_find = workdir.find
@@ -322,4 +331,5 @@ module Workflow
     Misc.path_relative_to(workdir_find, File.dirname(path)).sub(/([^\/]+)\/.*/,'\1')
   end
 end

data/lib/rbbt/workflow/step.rb CHANGED Viewed

@@ -281,7 +281,11 @@ class Step
   end
   def recursive_clean
-    rec_dependencies.each{|step| step.clean }
+    rec_dependencies.each do |step|
+      if File.exists?(step.info_file)
+        step.clean
+      end
+    end
     clean
   end

data/share/rbbt_commands/workflow/task CHANGED Viewed

@@ -6,7 +6,7 @@ require 'rbbt/workflow/usage'
 YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
-def usage(workflow = nil, task = nil, error = nil)
+def usage(workflow = nil, task = nil, error = nil, backtrace = nil)
   puts SOPT.doc
   puts "## WORKFLOW"
   puts
@@ -27,7 +27,8 @@ def usage(workflow = nil, task = nil, error = nil)
     workflow.doc(task)
     if error
         puts
-        puts "Error: " << error
+        puts Term::ANSIColor.red("Error: ") << error
+        puts Term::ANSIColor.red("Backtrace: ") << backtrace * "\n"
     end
   end
@@ -112,8 +113,10 @@ options = SOPT.get <<EOF
 -rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
 -jn--jobname* Job name to use. The name 'Default' is used by default:
 -pn--printname Print the name of the job and exit without starting it:
+-jf--job_file* Output one of the job produced files:
 -wd--workdir* Change the working directory of the workflow:
 -O--output* Save job result into file:
+--provenance Report the jobs provenance:
 --fork Run job asyncronously:
 EOF
@@ -178,13 +181,13 @@ workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir))) if opt
 job = workflow.job(task.name, name, job_options)
 # clean job
-if clean and job.done? != false
+if clean
   job.clean
   sleep 1
   job = workflow.job(task.name, name, job_options)
 end
-if recursive_clean and job.done?
+if recursive_clean
   job.recursive_clean
   sleep 1
   job = workflow.job(task.name, name, job_options)
@@ -192,46 +195,59 @@ end
 # run
 begin
-if do_exec
-  res = job.exec
-  case
-  when Array === res
-    puts res * "\n"
-  when TSV === res
-    puts res
-  when Hash === res
-    puts res.to_yaml
+  if do_exec
+    res = job.exec
+    case
+    when Array === res
+      puts res * "\n"
+    when TSV === res
+      puts res
+    when Hash === res
+      puts res.to_yaml
+    else
+      puts res
+    end
+    exit 0
+  end
+  if do_fork
+    job.fork
+    while not job.done?
+      message = job.messages ? job.messages.last : "no message"
+      Log.debug "#{job.status}: #{message}"
+      sleep 2
+    end
+    raise job.messages.last if job.error?
+    res = job.load
   else
-    puts res
+    res = job.run(true)
   end
-  exit 0
-end
-if do_fork
-  job.fork
-  while not job.done?
-    message = job.messages ? job.messages.last : "no message"
-    Log.debug "#{job.status}: #{message}"
-    sleep 2
+  if options.delete(:provenance)
+    require 'pp'
+    pp job.provenance
+    exit 0
   end
-  raise job.messages.last if job.error?
-  res = job.load
-else
-  res = job.run(true)
-end
-if options.delete(:printname)
-  puts job.name
-  exit 0
-else
-  Log.low "Job name: #{job.name}"
-end
+  if options.delete(:printname)
+    puts job.name
+    exit 0
+  else
+    Log.low "Job name: #{job.name}"
+  end
 rescue ParameterException
-    usage(workflow, task, $!.message)
+  SOPT.delete_inputs(workflow.rec_inputs(task.name))
+  usage(workflow, task, $!.message, $!.backtrace)
 end
 out = options.include?(:output) ? File.open(options[:output], 'wb') : STDOUT
+if job_file = options.delete(:job_file)
+  out.puts res.file(job_file).read
+  exit 0
+end
 if Step === res
   out.puts Open.read(res.path) if File.exists? res.path
 else

data/test/rbbt/association/test_index.rb CHANGED Viewed

@@ -29,7 +29,7 @@ class TestAssociationIndex < Test::Unit::TestCase
     genes = tp53.pina_interactors.ensembl
     genes << tp53
-    Misc.benchmark(10) do
+    Misc.benchmark(100) do
       index.subset_entities("Ensembl Gene ID" => genes).length
     end

data/test/rbbt/test_fix_width_table.rb CHANGED Viewed

@@ -76,8 +76,8 @@ g 25
   def test_range
     data =<<-EOF
+##012345678901234567890
 #ID:Range
-#:012345678901234567890
 a:   ______
 b: ______
 c:    _______

data/test/rbbt/test_knowledge_base.rb CHANGED Viewed

@@ -55,53 +55,12 @@ class TestKnowledgeBase < Test::Unit::TestCase
     assert @kb.all_databases.include? "pina"
   end
-  def test_items
-    tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
-    kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
-    kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
-  end
-  def __test_subset
+  def test_subset
     gene = "TP53"
     found = Genomics.knowledge_base.identify :pina, gene
-    p53_interactors = Misc.profile{ Genomics.knowledge_base.children(:pina, found).target_entity }
+    p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity
-    Misc.profile do
-      puts Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).length
-    end
-    ddd 2
-    #assert Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).target_entities.name.include? "MDM2"
-  end
-  def test_benchmark
-    tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
-    kb = KnowledgeBase.new Rbbt.tmp.test.kb2
-    kb.namespace = "Hsa/jan2013"
-      require 'rbbt/sources/COSMIC'
-      require 'rbbt/entity/genomic_mutation'
-      mutations = tp53.COSMIC_mutations
-      Misc.benchmark(10) do
-        name = "mutations"
-        kb.add_index name, "Ensembl Gene ID", "Genomic Mutation", "Change"
-        kb.write name do
-          mutations.each do |gm|
-            kb.add name, tp53, gm, gm.base
-          end
-        end
-      end
-  end
-  def test_benchmark2
-    tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
-    kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
-    kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
-    l = nil
-    Misc.benchmark(1000) do
-     l = tp53.transcripts.length
-    end
-    assert l > 0
+    assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
   end
   def test_syndication

data/test/rbbt/tsv/test_accessor.rb CHANGED Viewed

@@ -57,6 +57,7 @@ row2    A    B    Id3
     TmpFile.with_file(content) do |filename|
       tsv = TSV.open(filename, :sep => /\s+/)
+      puts tsv.to_s
       assert tsv.to_s =~ /row1\ta|aa|aaa/
       assert tsv.to_s =~ /:type=:double/
     end

data/test/rbbt/tsv/test_attach.rb CHANGED Viewed

@@ -87,7 +87,6 @@ B    Id3
     tsv1.attach_source_key tsv2, "ValueB"
     assert_equal %w(ValueA ValueB OtherID), tsv1.fields
     assert_equal "Id1", tsv1["row1"]["OtherID"]
   end
@@ -290,7 +289,7 @@ row6 dd dd ee
     EOF
     TmpFile.with_file do |f|
-      TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, " ")
+      TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, :sep => " ")
       assert_equal result, Open.read(f)
     end
   end
@@ -327,6 +326,7 @@ row6 dd dd ee
   def test_merge_different_rows_split_lines
     file1 =<<-EOF
+#ID,letterA,letterB,letterC
 row6,dd,dd,ee
 row1,a,b,c
 row1,aa,bb,cc
@@ -334,6 +334,7 @@ row2,A,B,C
 row3,1,2,3
    EOF
     file2 =<<-EOF
+#ID,letterD,letterE
 row20,rr,rr
 row1,d,e
 row2,D,E
@@ -342,6 +343,8 @@ row4,x,y
     # Might be slightly different ...
     result1 =<<-EOF
+#: :sep=,
+#ID,letterA,letterB,letterC,letterD,letterE
 row1,aa|a,bb|b,cc|c,d,e
 row2,A,B,C,D,E
 row20,,,,rr,rr
@@ -350,6 +353,8 @@ row4,,,,x,y
 row6,dd,dd,ee,,
     EOF
     result2 =<<-EOF
+#: :sep=,
+#ID,letterA,letterB,letterC,letterD,letterE
 row1,a|aa,b|bb,c|cc,d,e
 row2,A,B,C,D,E
 row20,,,,rr,rr
@@ -359,8 +364,7 @@ row6,dd,dd,ee,,
     EOF
     TmpFile.with_file do |f|
-      TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, ','
+      TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, :sep => ','
       # ... so check for either
       assert(Open.read(f) == result1 || Open.read(f) == result2)
     end
@@ -403,6 +407,7 @@ row6,dd,dd,ee,,
   def test_merge_rows
     file1 =<<-EOF
+#ID,letterA,letterB,letterC
 row1,a,b,c
 row1,aa,bb,cc
 row2,A,B,C
@@ -410,8 +415,9 @@ row3,1,2,3
     EOF
     TmpFile.with_file(file1) do |input|
       TmpFile.with_file() do |output|
-        TSV.merge_row_fields Open.open(input), output
-        assert Open.read(output) =~ /a|aa/
+        TSV.merge_row_fields Open.open(input), output, :sep => ','
+        assert Open.read(output) =~ /^#ID,letterA,letterB,letterC$/
+        assert Open.read(output).index "a|aa"
       end
     end

data/test/rbbt/util/test_misc.rb CHANGED Viewed

@@ -6,11 +6,11 @@ require 'rbbt/entity'
 class TestMisc < Test::Unit::TestCase
-  def _test_humanize
+  def test_humanize
     assert_equal "mutation_enrichment", Misc.humanize("MutationEnrichment")
   end
-  def _test_fixutf8
+  def test_fixutf8
     string = "abc\xffdef"
     string = string.force_encoding("UTF-8") if string.respond_to? :force_encoding
     assert(! string.valid_encoding?) if string.respond_to? :valid_encoding?
@@ -19,37 +19,37 @@ class TestMisc < Test::Unit::TestCase
     assert( Misc.fixutf8(string).valid_encoding) if string.respond_to? :valid_encoding
   end
-  def _test_colors_for
+  def test_colors_for
     colors, used = Misc.colors_for([1,2,2,1,2,1,2,2,3,3,2,3,2])
     assert_equal Misc::COLOR_LIST[1], used[2]
   end
-  def _test_total_length
+  def test_total_length
     ranges = [(0..100), (50..150), (120..160)]
     ranges = [(0..100), (50..150), (120..160), (51..70)]
     assert_equal 161, Misc.total_length(ranges)
   end
-  def _test_id_filename?
+  def test_id_filename?
     TmpFile.with_file("") do |file|
       assert Misc.is_filename?(file)
       assert ! Misc.is_filename?("TEST STRING")
     end
   end
-  def _test_merge_sorted_arrays
+  def test_merge_sorted_arrays
     assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
   end
-  def _test_intersect_sorted_arrays
+  def test_intersect_sorted_arrays
     assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
   end
-  def _test_sorted_array_matches
+  def test_sorted_array_matches
     assert_equal [1,3], Misc.sorted_array_hits(%w(a b c d e), %w(b d))
   end
-  def _test_binary_include?
+  def test_binary_include?
     a = %w(a b c d e).sort
     assert Misc.binary_include?(a, "a")
     assert(!Misc.binary_include?(a, "z"))
@@ -58,24 +58,24 @@ class TestMisc < Test::Unit::TestCase
     assert(Misc.binary_include?(a, "d"))
   end
-  def _test_process_to_hash
+  def test_process_to_hash
     list = [1,2,3,4]
     assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
   end
-#  def _test_pdf2text_example
+#  def test_pdf2text_example
 #    assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
 #  end
 #
-#  def _test_pdf2text_EPAR
+#  def test_pdf2text_EPAR
 #    assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
 #  end
 #
-#  def _test_pdf2text_wrong
+#  def test_pdf2text_wrong
 #    assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
 #  end
-  def _test_string2hash
+  def test_string2hash
     assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
     assert_equal(true, Misc.string2hash(":true")[:true])
     assert_equal(true, Misc.string2hash("true")["true"])
@@ -93,17 +93,17 @@ class TestMisc < Test::Unit::TestCase
     end
   end
-  def _test_named_array
+  def test_named_array
     a = NamedArray.setup([1,2,3,4], %w(a b c d))
     assert_equal(1, a['a'])
   end
-#  def _test_path_relative_to
+#  def test_path_relative_to
 #    assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
 #  end
-#  def _test_chunk
-#    _test =<<-EOF
+#  def test_chunk
+#    test =<<-EOF
 #This is an example file. Entries are separated by Entry
 #-- Entry
 #1
@@ -118,7 +118,7 @@ class TestMisc < Test::Unit::TestCase
 #    assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
 #  end
-  def _test_hash2string
+  def test_hash2string
     hash = {}
     assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
@@ -136,14 +136,14 @@ class TestMisc < Test::Unit::TestCase
   end
-  def _test_merge
+  def test_merge
     a = [[1],[2]]
     a = NamedArray.setup a, %w(1 2)
     a.merge [3,4]
     assert_equal [1,3], a[0]
   end
-  def _test_indiferent_hash
+  def test_indiferent_hash
     a = {:a => 1, "b" => 2}
     a.extend IndiferentHash
@@ -153,7 +153,7 @@ class TestMisc < Test::Unit::TestCase
     assert_equal 2, a[:b]
   end
-  def _test_lockfile
+  def test_lockfile
     TmpFile.with_file do |tmpfile|
       pids = []
@@ -177,7 +177,7 @@ class TestMisc < Test::Unit::TestCase
     end
   end
-  def _test_positions2hash
+  def test_positions2hash
     inputs = Misc.positional2hash([:one, :two, :three], 1, :two => 2, :four => 4)
     assert_equal 1, inputs[:one]
     assert_equal 2, inputs[:two]
@@ -185,7 +185,7 @@ class TestMisc < Test::Unit::TestCase
     assert_equal nil, inputs[:four]
   end
-  def _test_mean
+  def test_mean
     assert_equal 2, Misc.mean([1,2,3])
     assert_equal 3, Misc.mean([1,2,3,4,5])
   end
@@ -194,32 +194,32 @@ class TestMisc < Test::Unit::TestCase
     assert_equal Math.sqrt(2), Misc.sd([1,3])
   end
-  def _test_align_small
+  def test_align_small
     reference = "AABCDEBD"
     sequence  = "ABCD"
     assert_equal '-ABCD---', Misc.fast_align(reference, sequence).last
   end
-  def _test_align_real
+  def test_align_real
     reference = "SGNECNKAIDGNKDTFWHTFYGANGDPKPPPHTYTIDMKTTQNVNGLSMLPRQDGNQNGWIGRHEVYLSSDGTNW"
     sequence  = "TYTIDMKTTQNVNGLSML"
     assert_equal "--------------------------------TYTIDMKTTQNVNGLSML-------------------------", Misc.fast_align(reference, sequence).last
   end
-  def _test_divide
+  def test_divide
     assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
   end
-  def _test_ordered_divide
+  def test_ordered_divide
     assert_equal 5, Misc.ordered_divide(%w(1 2 3 4 5 6 7 8 9),2).length
   end
-  def _test_collapse_ranges
+  def test_collapse_ranges
     ranges = [(0..100), (50..150), (51..61),(200..250), (300..324),(320..350)]
     assert_equal [(0..150),(200..250), (300..350)], Misc.collapse_ranges(ranges)
   end
-  def _test_humanize
+  def test_humanize
     str1 = "test_string"
     str2 = "TEST_string"
     str3 = "test"
@@ -229,18 +229,18 @@ class TestMisc < Test::Unit::TestCase
     assert_equal "Test", Misc.humanize(str3)
   end
-  def _test_snake_case
+  def test_snake_case
     str1 = "ACRONIMTest"
     str2 = "ACRONIM_test"
     assert_equal "ACRONIM_test", Misc.snake_case(str1)
     assert_equal "ACRONIM_test", Misc.snake_case(str2)
   end
-  def _test_correct_vcf_mutations
+  def test_correct_vcf_mutations
     assert_equal [737407, ["-----", "-----G", "-----GTTAAT"]], Misc.correct_vcf_mutation(737406, "GTTAAT", "G,GG,GGTTAAT")
   end
-  def _test_fingerprint
+  def test_fingerprint
     puts Misc.fingerprint({:a => 1})
   end
@@ -253,4 +253,26 @@ class TestMisc < Test::Unit::TestCase
       end
     end
   end
+  def test_pipe
+    t = 5
+    stream = Misc.open_pipe do |sin|
+      t.times do |i|
+        puts "Calculating line #{ i }"
+        sleep 0.5
+        sin.puts "LINE #{ i }"
+      end
+    end
+    time = Time.now
+    lines = []
+    while line = stream.gets
+      lines << line.strip
+    end
+    time_spent = Time.new - time
+    assert time_spent >= t * 0.5
+    assert time_spent <= (t+1) * 0.5
+    assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-util
 version: !ruby/object:Gem::Version
-  version: 5.6.0
+  version: 5.6.1
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-02-10 00:00:00.000000000 Z
+date: 2014-02-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake