rbbt-sources 2.1.5 → 2.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
 - data/lib/rbbt/sources/entrez.rb +28 -68
 - data/lib/rbbt/sources/organism.rb +6 -0
 - data/lib/rbbt/sources/uniprot.rb +75 -15
 - data/test/rbbt/sources/test_entrez.rb +2 -11
 - data/test/rbbt/sources/test_organism.rb +12 -12
 - metadata +21 -24
 - data/lib/rbbt/sources/COSMIC.rb +0 -153
 - data/lib/rbbt/sources/dbSNP.rb +0 -194
 - data/lib/rbbt/sources/genomes1000.rb +0 -109
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,15 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
              data.tar.gz: !binary |-
         
     | 
| 
       6 
     | 
    
         
            -
                YWJlYTE4Y2M2YWM0ZjIxYTAxZTE4ZjExZmExNjQwYTJjNTg3NGVmZg==
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: daf367338fb6e78d2cb7b76440e67712d27f34ab
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 5b7a7308779ec4441fa2eb997d6f9b7f0dd37e3a
         
     | 
| 
       7 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       8 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
                ZTM0MjA5ZWFmZjlkMzAzMmRjYTBhOGVhMjM4Y2JhMmM2OThjMjQ1MDRkY2Vi
         
     | 
| 
       11 
     | 
    
         
            -
                YTFiOWYyNmEwMGZmMzg5MDFiNjQwMWNlNDVhODEwM2VjNTg0MTc=
         
     | 
| 
       12 
     | 
    
         
            -
              data.tar.gz: !binary |-
         
     | 
| 
       13 
     | 
    
         
            -
                ZTY3ZGFjM2E3ZmY0OThmZjZiNzI2OTAwNWNmZWZlYmI5ODRkMTEyY2IzODNm
         
     | 
| 
       14 
     | 
    
         
            -
                YmZkNjY3NTI2MjQzNjMzMTc4YjgzYjVkM2IwZjc0OTA0NWM0YzM1ZDUzMjU5
         
     | 
| 
       15 
     | 
    
         
            -
                ZTliYTNjZWY4YWMwMjUxMDFkMTRiMGRmNWRkNWQyNjBjYjgwYzE=
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: bb568b0d788284e82d0ac0d9cdbd14db7c0e59b4977ddce57e2701f25ca18bbef93d43424179a188f73daaacc87963d039a17aaf0916872945f2d384e6441552
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: b24f422176f10f518f692a7878c2389df0276df27a074feb5918bae1993f860fae4558d330f95de66a7857da712b5c811e487c0b117f553106215d1065f856af
         
     | 
    
        data/lib/rbbt/sources/entrez.rb
    CHANGED
    
    | 
         @@ -1,6 +1,7 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'rbbt'
         
     | 
| 
      
 1 
     | 
    
         
            +
            require 'rbbt-util'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'rbbt/tsv'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require 'rbbt/resource'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'rbbt/util/filecache'
         
     | 
| 
       4 
5 
     | 
    
         
             
            require 'rbbt/bow/bow'
         
     | 
| 
       5 
6 
     | 
    
         
             
            require 'set'
         
     | 
| 
       6 
7 
     | 
    
         | 
| 
         @@ -70,85 +71,44 @@ module Entrez 
     | 
|
| 
       70 
71 
     | 
    
         | 
| 
       71 
72 
     | 
    
         
             
              private 
         
     | 
| 
       72 
73 
     | 
    
         | 
| 
       73 
     | 
    
         
            -
              def self.get_online(geneids)
         
     | 
| 
       74 
74 
     | 
    
         | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
      
 75 
     | 
    
         
            +
              def self.get_gene(geneids)
         
     | 
| 
      
 76 
     | 
    
         
            +
                _array = Array === geneids
         
     | 
| 
       76 
77 
     | 
    
         | 
| 
       77 
     | 
    
         
            -
                 
     | 
| 
       78 
     | 
    
         
            -
                 
     | 
| 
       79 
     | 
    
         
            -
                  begin
         
     | 
| 
       80 
     | 
    
         
            -
                    Misc.try3times do
         
     | 
| 
       81 
     | 
    
         
            -
                      url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{geneids_list * ","}" 
         
     | 
| 
      
 78 
     | 
    
         
            +
                geneids = [geneids] unless Array === geneids
         
     | 
| 
      
 79 
     | 
    
         
            +
                geneids = geneids.compact.collect{|id| id}
         
     | 
| 
       82 
80 
     | 
    
         | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
      
 81 
     | 
    
         
            +
                result_files = FileCache.cache_online_elements(geneids, 'gene-{ID}.xml') do |ids|
         
     | 
| 
      
 82 
     | 
    
         
            +
                  result = {}
         
     | 
| 
      
 83 
     | 
    
         
            +
                  values = []
         
     | 
| 
      
 84 
     | 
    
         
            +
                  Misc.divide(ids, (ids.length / 100) + 1).each do |list|
         
     | 
| 
      
 85 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 86 
     | 
    
         
            +
                      Misc.try3times do
         
     | 
| 
      
 87 
     | 
    
         
            +
                        url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{list * ","}" 
         
     | 
| 
       84 
88 
     | 
    
         | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
      
 89 
     | 
    
         
            +
                        xml = Open.read(url, :wget_options => {:quiet => true}, :nocache => true)
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                        values += xml.scan(/(<Entrezgene>.*?<\/Entrezgene>)/sm).flatten
         
     | 
| 
      
 92 
     | 
    
         
            +
                      end
         
     | 
| 
      
 93 
     | 
    
         
            +
                    rescue
         
     | 
| 
      
 94 
     | 
    
         
            +
                      Log.error $!.message
         
     | 
| 
       86 
95 
     | 
    
         
             
                    end
         
     | 
| 
       87 
     | 
    
         
            -
                  rescue
         
     | 
| 
       88 
     | 
    
         
            -
                    puts $!.message
         
     | 
| 
       89 
     | 
    
         
            -
                    genes += geneids_list.collect{|g| nil}
         
     | 
| 
       90 
96 
     | 
    
         
             
                  end
         
     | 
| 
       91 
     | 
    
         
            -
                end
         
     | 
| 
       92 
97 
     | 
    
         | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                  list = Hash[*genes_complete.zip([nil]).flatten]
         
     | 
| 
       95 
     | 
    
         
            -
                  genes.each{|gene|
         
     | 
| 
      
 98 
     | 
    
         
            +
                  values.each do |xml|
         
     | 
| 
       96 
99 
     | 
    
         
             
                    geneid = gene.match(/<Gene-track_geneid>(\d+)/)[1]
         
     | 
| 
       97 
     | 
    
         
            -
                     
     | 
| 
       98 
     | 
    
         
            -
                     
     | 
| 
       99 
     | 
    
         
            -
                   
     | 
| 
       100 
     | 
    
         
            -
                  return list
         
     | 
| 
       101 
     | 
    
         
            -
                else
         
     | 
| 
       102 
     | 
    
         
            -
                  return genes.first
         
     | 
| 
      
 100 
     | 
    
         
            +
                    
         
     | 
| 
      
 101 
     | 
    
         
            +
                    result[geneid] = xml
         
     | 
| 
      
 102 
     | 
    
         
            +
                  end
         
     | 
| 
       103 
103 
     | 
    
         
             
                end
         
     | 
| 
       104 
     | 
    
         
            -
              end
         
     | 
| 
       105 
104 
     | 
    
         | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
      
 105 
     | 
    
         
            +
                genes = {}
         
     | 
| 
      
 106 
     | 
    
         
            +
                geneids.each{|id| genes[id] = Gene.new(Open.read(result_files[id])) }
         
     | 
| 
       107 
107 
     | 
    
         | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
              end
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
              def self.get_gene(geneid)
         
     | 
| 
       113 
     | 
    
         
            -
                return nil if geneid.nil?
         
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
                if Array === geneid
         
     | 
| 
       116 
     | 
    
         
            -
                  missing = []
         
     | 
| 
       117 
     | 
    
         
            -
                  list = {}
         
     | 
| 
       118 
     | 
    
         
            -
             
     | 
| 
       119 
     | 
    
         
            -
                  geneid.each{|p|
         
     | 
| 
       120 
     | 
    
         
            -
                    next if p.nil?
         
     | 
| 
       121 
     | 
    
         
            -
                    if FileCache.found(gene_filename p)
         
     | 
| 
       122 
     | 
    
         
            -
                      list[p] = Gene.new(Open.read(FileCache.path(gene_filename p)))
         
     | 
| 
       123 
     | 
    
         
            -
                    else
         
     | 
| 
       124 
     | 
    
         
            -
                      missing << p 
         
     | 
| 
       125 
     | 
    
         
            -
                    end
         
     | 
| 
       126 
     | 
    
         
            -
                  }
         
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
       128 
     | 
    
         
            -
             
     | 
| 
       129 
     | 
    
         
            -
                  return list unless missing.any?
         
     | 
| 
       130 
     | 
    
         
            -
                  genes = get_online(missing)
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
                  genes.each{|p, xml|
         
     | 
| 
       133 
     | 
    
         
            -
                    filename = gene_filename p    
         
     | 
| 
       134 
     | 
    
         
            -
                    FileCache.add(filename,xml) unless FileCache.found(filename)
         
     | 
| 
       135 
     | 
    
         
            -
                    list[p] =  Gene.new(xml)
         
     | 
| 
       136 
     | 
    
         
            -
                  }
         
     | 
| 
       137 
     | 
    
         
            -
             
     | 
| 
       138 
     | 
    
         
            -
                  return list
         
     | 
| 
      
 108 
     | 
    
         
            +
                if _array
         
     | 
| 
      
 109 
     | 
    
         
            +
                  genes
         
     | 
| 
       139 
110 
     | 
    
         
             
                else
         
     | 
| 
       140 
     | 
    
         
            -
                   
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
             
     | 
| 
       143 
     | 
    
         
            -
                  if FileCache.found(filename)
         
     | 
| 
       144 
     | 
    
         
            -
                    return Gene.new(Open.read(FileCache.path(filename)))
         
     | 
| 
       145 
     | 
    
         
            -
                  else
         
     | 
| 
       146 
     | 
    
         
            -
                    xml = get_online(geneid)
         
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
       148 
     | 
    
         
            -
                    FileCache.add(filename, xml) unless FileCache.found(filename)
         
     | 
| 
       149 
     | 
    
         
            -
             
     | 
| 
       150 
     | 
    
         
            -
                    return Gene.new(xml)
         
     | 
| 
       151 
     | 
    
         
            -
                  end
         
     | 
| 
      
 111 
     | 
    
         
            +
                  genes.values.first
         
     | 
| 
       152 
112 
     | 
    
         
             
                end
         
     | 
| 
       153 
113 
     | 
    
         
             
              end
         
     | 
| 
       154 
114 
     | 
    
         | 
| 
         @@ -144,6 +144,12 @@ module Organism 
     | 
|
| 
       144 
144 
     | 
    
         
             
                }.first
         
     | 
| 
       145 
145 
     | 
    
         
             
              end
         
     | 
| 
       146 
146 
     | 
    
         | 
| 
      
 147 
     | 
    
         
            +
              def self.organism_code(name)
         
     | 
| 
      
 148 
     | 
    
         
            +
                organisms.select{|organism|
         
     | 
| 
      
 149 
     | 
    
         
            +
                  organism == name or Organism.scientific_name(organism) =~ /#{ name }/i
         
     | 
| 
      
 150 
     | 
    
         
            +
                }.first
         
     | 
| 
      
 151 
     | 
    
         
            +
              end
         
     | 
| 
      
 152 
     | 
    
         
            +
             
     | 
| 
       147 
153 
     | 
    
         
             
              def self.known_ids(name)
         
     | 
| 
       148 
154 
     | 
    
         
             
                TSV::Parser.new(Organism.identifiers(name).open).all_fields
         
     | 
| 
       149 
155 
     | 
    
         
             
              end
         
     | 
    
        data/lib/rbbt/sources/uniprot.rb
    CHANGED
    
    | 
         @@ -1,5 +1,6 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'rbbt'
         
     | 
| 
      
 1 
     | 
    
         
            +
            require 'rbbt-util'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'rbbt/util/open'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'rbbt/util/filecache'
         
     | 
| 
       3 
4 
     | 
    
         
             
            require 'rbbt/resource'
         
     | 
| 
       4 
5 
     | 
    
         
             
            require 'rbbt/sources/cath'
         
     | 
| 
       5 
6 
     | 
    
         
             
            require 'rbbt/sources/uniprot'
         
     | 
| 
         @@ -32,12 +33,78 @@ module UniProt 
     | 
|
| 
       32 
33 
     | 
    
         
             
                tsv.to_s
         
     | 
| 
       33 
34 
     | 
    
         
             
              end
         
     | 
| 
       34 
35 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
36 
     | 
    
         
             
              UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
         
     | 
| 
       37 
37 
     | 
    
         
             
              UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta"
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
              def self.get_uniprot_entry(uniprotids)
         
     | 
| 
      
 40 
     | 
    
         
            +
                _array = Array === uniprotids
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                uniprotids = [uniprotids] unless Array === uniprotids
         
     | 
| 
      
 43 
     | 
    
         
            +
                uniprotids = uniprotids.compact.collect{|id| id}
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-{ID}.xml') do |ids|
         
     | 
| 
      
 46 
     | 
    
         
            +
                  result = {}
         
     | 
| 
      
 47 
     | 
    
         
            +
                  ids.each do |id|
         
     | 
| 
      
 48 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 49 
     | 
    
         
            +
                      Misc.try3times do
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                        content = Open.read(UNIPROT_TEXT.sub("[PROTEIN]", id), :wget_options => {:quiet => true}, :nocache => true)
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                        result[id] = content
         
     | 
| 
      
 54 
     | 
    
         
            +
                      end
         
     | 
| 
      
 55 
     | 
    
         
            +
                    rescue
         
     | 
| 
      
 56 
     | 
    
         
            +
                      Log.error $!.message
         
     | 
| 
      
 57 
     | 
    
         
            +
                    end
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
                  result
         
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                uniprots = {}
         
     | 
| 
      
 63 
     | 
    
         
            +
                uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                if _array
         
     | 
| 
      
 66 
     | 
    
         
            +
                  uniprots
         
     | 
| 
      
 67 
     | 
    
         
            +
                else
         
     | 
| 
      
 68 
     | 
    
         
            +
                  uniprots.values.first
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
              end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
              def self.get_uniprot_sequence(uniprotids)
         
     | 
| 
      
 73 
     | 
    
         
            +
                _array = Array === uniprotids
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
                uniprotids = [uniprotids] unless Array === uniprotids
         
     | 
| 
      
 76 
     | 
    
         
            +
                uniprotids = uniprotids.compact.collect{|id| id}
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-sequence-{ID}') do |ids|
         
     | 
| 
      
 79 
     | 
    
         
            +
                  result = {}
         
     | 
| 
      
 80 
     | 
    
         
            +
                  ids.each do |id|
         
     | 
| 
      
 81 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 82 
     | 
    
         
            +
                      Misc.try3times do
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                        url = UNIPROT_FASTA.sub "[PROTEIN]", id
         
     | 
| 
      
 85 
     | 
    
         
            +
                        text = Open.read(url, :nocache => true)
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                        result[id] = text.split(/\n/).select{|line| line !~ /^>/} * ""
         
     | 
| 
      
 88 
     | 
    
         
            +
                      end
         
     | 
| 
      
 89 
     | 
    
         
            +
                    rescue
         
     | 
| 
      
 90 
     | 
    
         
            +
                      Log.error $!.message
         
     | 
| 
      
 91 
     | 
    
         
            +
                    end
         
     | 
| 
      
 92 
     | 
    
         
            +
                  end
         
     | 
| 
      
 93 
     | 
    
         
            +
                  result
         
     | 
| 
      
 94 
     | 
    
         
            +
                end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                uniprots = {}
         
     | 
| 
      
 97 
     | 
    
         
            +
                uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                if _array
         
     | 
| 
      
 100 
     | 
    
         
            +
                  uniprots
         
     | 
| 
      
 101 
     | 
    
         
            +
                else
         
     | 
| 
      
 102 
     | 
    
         
            +
                  uniprots.values.first
         
     | 
| 
      
 103 
     | 
    
         
            +
                end
         
     | 
| 
      
 104 
     | 
    
         
            +
              end
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
       38 
106 
     | 
    
         
             
              def self.pdbs(protein)
         
     | 
| 
       39 
     | 
    
         
            -
                 
     | 
| 
       40 
     | 
    
         
            -
                text = Open.read(url)
         
     | 
| 
      
 107 
     | 
    
         
            +
                text = get_uniprot_entry(protein)
         
     | 
| 
       41 
108 
     | 
    
         | 
| 
       42 
109 
     | 
    
         
             
                pdb = {}
         
     | 
| 
       43 
110 
     | 
    
         | 
| 
         @@ -59,15 +126,11 @@ module UniProt 
     | 
|
| 
       59 
126 
     | 
    
         
             
              end
         
     | 
| 
       60 
127 
     | 
    
         | 
| 
       61 
128 
     | 
    
         
             
              def self.sequence(protein)
         
     | 
| 
       62 
     | 
    
         
            -
                 
     | 
| 
       63 
     | 
    
         
            -
                text = Open.read(url)
         
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                text.split(/\n/).select{|line| line !~ /^>/} * ""
         
     | 
| 
      
 129 
     | 
    
         
            +
                get_uniprot_sequence(protein)
         
     | 
| 
       66 
130 
     | 
    
         
             
              end
         
     | 
| 
       67 
131 
     | 
    
         | 
| 
       68 
132 
     | 
    
         
             
              def self.features(protein)
         
     | 
| 
       69 
     | 
    
         
            -
                 
     | 
| 
       70 
     | 
    
         
            -
                text = Open.read(url)
         
     | 
| 
      
 133 
     | 
    
         
            +
                text = get_uniprot_entry(protein)
         
     | 
| 
       71 
134 
     | 
    
         | 
| 
       72 
135 
     | 
    
         
             
                text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
         
     | 
| 
       73 
136 
     | 
    
         | 
| 
         @@ -78,7 +141,6 @@ module UniProt 
     | 
|
| 
       78 
141 
     | 
    
         | 
| 
       79 
142 
     | 
    
         
             
                type = nil
         
     | 
| 
       80 
143 
     | 
    
         
             
                parts.each do |part|
         
     | 
| 
       81 
     | 
    
         
            -
                  parts
         
     | 
| 
       82 
144 
     | 
    
         
             
                  if part[0..1] == "FT"
         
     | 
| 
       83 
145 
     | 
    
         
             
                    type = part.gsub(/FT\s+/,'')
         
     | 
| 
       84 
146 
     | 
    
         
             
                    next
         
     | 
| 
         @@ -111,8 +173,7 @@ module UniProt 
     | 
|
| 
       111 
173 
     | 
    
         | 
| 
       112 
174 
     | 
    
         | 
| 
       113 
175 
     | 
    
         
             
              def self.variants(protein)
         
     | 
| 
       114 
     | 
    
         
            -
                 
     | 
| 
       115 
     | 
    
         
            -
                text = Open.read(url)
         
     | 
| 
      
 176 
     | 
    
         
            +
                text = get_uniprot_entry(protein)
         
     | 
| 
       116 
177 
     | 
    
         | 
| 
       117 
178 
     | 
    
         
             
                text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
         
     | 
| 
       118 
179 
     | 
    
         | 
| 
         @@ -157,8 +218,7 @@ module UniProt 
     | 
|
| 
       157 
218 
     | 
    
         
             
              end
         
     | 
| 
       158 
219 
     | 
    
         | 
| 
       159 
220 
     | 
    
         
             
              def self.cath(protein)
         
     | 
| 
       160 
     | 
    
         
            -
                 
     | 
| 
       161 
     | 
    
         
            -
                text = Open.read(url)
         
     | 
| 
      
 221 
     | 
    
         
            +
                text = get_uniprot_entry(protein)
         
     | 
| 
       162 
222 
     | 
    
         | 
| 
       163 
223 
     | 
    
         
             
                cath = {}
         
     | 
| 
       164 
224 
     | 
    
         
             
                text.split(/\n/).each{|l| 
         
     | 
| 
         @@ -21,21 +21,12 @@ class TestEntrez < Test::Unit::TestCase 
     | 
|
| 
       21 
21 
     | 
    
         
             
                assert(data['850320'].include? '1574125') 
         
     | 
| 
       22 
22 
     | 
    
         
             
              end
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
              def test_getonline
         
     | 
| 
       25 
     | 
    
         
            -
                geneids = 9129
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids))
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                geneids = [9129,9]
         
     | 
| 
       30 
     | 
    
         
            -
                assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids)[9129])
         
     | 
| 
       31 
     | 
    
         
            -
              end
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
24 
     | 
    
         
             
              def test_getgene
         
     | 
| 
       34 
25 
     | 
    
         
             
                geneids = 9129
         
     | 
| 
       35 
     | 
    
         
            -
                assert_equal([[" 
     | 
| 
      
 26 
     | 
    
         
            +
                assert_equal([["pre-mRNA processing factor 3"]], Entrez.get_gene(geneids).description)
         
     | 
| 
       36 
27 
     | 
    
         | 
| 
       37 
28 
     | 
    
         
             
                geneids = [9129, 728049]
         
     | 
| 
       38 
     | 
    
         
            -
                assert_equal([[" 
     | 
| 
      
 29 
     | 
    
         
            +
                assert_equal([["pre-mRNA processing factor 3"]], Entrez.get_gene(geneids)[9129].description)
         
     | 
| 
       39 
30 
     | 
    
         
             
              end
         
     | 
| 
       40 
31 
     | 
    
         | 
| 
       41 
32 
     | 
    
         
             
              def test_similarity
         
     | 
| 
         @@ -5,37 +5,37 @@ require 'rbbt/sources/ensembl_ftp' 
     | 
|
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            class TestOrganism < Test::Unit::TestCase
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
              def  
     | 
| 
      
 8 
     | 
    
         
            +
              def _test_known_ids
         
     | 
| 
       9 
9 
     | 
    
         
             
                assert Organism.known_ids("Hsa").include?("Associated Gene Name")
         
     | 
| 
       10 
10 
     | 
    
         
             
              end
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
              def  
     | 
| 
      
 12 
     | 
    
         
            +
              def _test_location
         
     | 
| 
       13 
13 
     | 
    
         
             
                assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
         
     | 
| 
       14 
14 
     | 
    
         
             
              end
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
              def  
     | 
| 
      
 16 
     | 
    
         
            +
              def _test_identifiers
         
     | 
| 
       17 
17 
     | 
    
         
             
                assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
         
     | 
| 
       18 
18 
     | 
    
         
             
                assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
         
     | 
| 
       19 
19 
     | 
    
         
             
                assert Organism.identifiers("Sce").tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
         
     | 
| 
       20 
20 
     | 
    
         
             
              end
         
     | 
| 
       21 
21 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
              def  
     | 
| 
      
 22 
     | 
    
         
            +
              def _test_lexicon
         
     | 
| 
       23 
23 
     | 
    
         
             
                assert TSV.open(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
         
     | 
| 
       24 
24 
     | 
    
         
             
              end
         
     | 
| 
       25 
25 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
              def  
     | 
| 
      
 26 
     | 
    
         
            +
              def _test_guess_id
         
     | 
| 
       27 
27 
     | 
    
         
             
                ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
         
     | 
| 
       28 
28 
     | 
    
         
             
                gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
         
     | 
| 
       29 
29 
     | 
    
         
             
                assert_equal "Associated Gene Name", Organism.guess_id("Sce", gene_name).first
         
     | 
| 
       30 
30 
     | 
    
         
             
                assert_equal "Ensembl Gene ID", Organism.guess_id("Sce", ensembl).first
         
     | 
| 
       31 
31 
     | 
    
         
             
              end
         
     | 
| 
       32 
32 
     | 
    
         | 
| 
       33 
     | 
    
         
            -
              def  
     | 
| 
      
 33 
     | 
    
         
            +
              def _test_organisms
         
     | 
| 
       34 
34 
     | 
    
         
             
                assert Organism.organisms.include? "Hsa"
         
     | 
| 
       35 
35 
     | 
    
         
             
                assert_equal "Hsa", Organism.organism("Homo sapiens")
         
     | 
| 
       36 
36 
     | 
    
         
             
              end
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
              def  
     | 
| 
      
 38 
     | 
    
         
            +
              def _test_attach_translations
         
     | 
| 
       39 
39 
     | 
    
         
             
                tsv = TSV.setup({"1020" => []}, :type => :list)
         
     | 
| 
       40 
40 
     | 
    
         
             
                tsv.key_field = "Entrez Gene ID"
         
     | 
| 
       41 
41 
     | 
    
         
             
                tsv.fields = []
         
     | 
| 
         @@ -47,7 +47,7 @@ class TestOrganism < Test::Unit::TestCase 
     | 
|
| 
       47 
47 
     | 
    
         
             
                assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
         
     | 
| 
       48 
48 
     | 
    
         
             
              end
         
     | 
| 
       49 
49 
     | 
    
         | 
| 
       50 
     | 
    
         
            -
              def  
     | 
| 
      
 50 
     | 
    
         
            +
              def _test_entrez_taxids
         
     | 
| 
       51 
51 
     | 
    
         
             
                assert_equal "Hsa", Organism.entrez_taxid_organism('9606')
         
     | 
| 
       52 
52 
     | 
    
         
             
              end
         
     | 
| 
       53 
53 
     | 
    
         | 
| 
         @@ -61,22 +61,22 @@ class TestOrganism < Test::Unit::TestCase 
     | 
|
| 
       61 
61 
     | 
    
         
             
                assert_equal mutation_19, Organism.liftOver([mutation_18], target_build, source_build).first
         
     | 
| 
       62 
62 
     | 
    
         
             
              end
         
     | 
| 
       63 
63 
     | 
    
         | 
| 
       64 
     | 
    
         
            -
              def  
     | 
| 
      
 64 
     | 
    
         
            +
              def _test_orhtolog
         
     | 
| 
       65 
65 
     | 
    
         
             
                require 'rbbt/entity/gene'
         
     | 
| 
       66 
66 
     | 
    
         
             
                assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog("Hsa/jun2011")
         
     | 
| 
       67 
67 
     | 
    
         
             
              end
         
     | 
| 
       68 
68 
     | 
    
         | 
| 
       69 
     | 
    
         
            -
              #def  
     | 
| 
      
 69 
     | 
    
         
            +
              #def _test_genes_at_chromosome
         
     | 
| 
       70 
70 
     | 
    
         
             
              #  pos = [12, 117799500]
         
     | 
| 
       71 
71 
     | 
    
         
             
              #  assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
         
     | 
| 
       72 
72 
     | 
    
         
             
              #end
         
     | 
| 
       73 
73 
     | 
    
         | 
| 
       74 
     | 
    
         
            -
              #def  
     | 
| 
      
 74 
     | 
    
         
            +
              #def _test_genes_at_chromosome_array
         
     | 
| 
       75 
75 
     | 
    
         
             
              #  pos = [12, [117799500, 106903900]]
         
     | 
| 
       76 
76 
     | 
    
         
             
              #  assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
         
     | 
| 
       77 
77 
     | 
    
         
             
              #end
         
     | 
| 
       78 
78 
     | 
    
         | 
| 
       79 
     | 
    
         
            -
              #def  
     | 
| 
      
 79 
     | 
    
         
            +
              #def _test_genes_at_genomic_positions
         
     | 
| 
       80 
80 
     | 
    
         
             
              #  pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
         
     | 
| 
       81 
81 
     | 
    
         
             
              #  assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
         
     | 
| 
       82 
82 
     | 
    
         
             
              #end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,83 +1,83 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: rbbt-sources
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 2.1. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.1.7
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Miguel Vazquez
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date:  
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2014-02-21 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: rbbt-util
         
     | 
| 
       15 
15 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       16 
16 
     | 
    
         
             
                requirements:
         
     | 
| 
       17 
     | 
    
         
            -
                - -  
     | 
| 
      
 17 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       18 
18 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       19 
19 
     | 
    
         
             
                    version: 4.0.0
         
     | 
| 
       20 
20 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       21 
21 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       22 
22 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       23 
23 
     | 
    
         
             
                requirements:
         
     | 
| 
       24 
     | 
    
         
            -
                - -  
     | 
| 
      
 24 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       25 
25 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       26 
26 
     | 
    
         
             
                    version: 4.0.0
         
     | 
| 
       27 
27 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       28 
28 
     | 
    
         
             
              name: rbbt-text
         
     | 
| 
       29 
29 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       30 
30 
     | 
    
         
             
                requirements:
         
     | 
| 
       31 
     | 
    
         
            -
                - -  
     | 
| 
      
 31 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       32 
32 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       33 
33 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       34 
34 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       35 
35 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       36 
36 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       37 
37 
     | 
    
         
             
                requirements:
         
     | 
| 
       38 
     | 
    
         
            -
                - -  
     | 
| 
      
 38 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       39 
39 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       40 
40 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       41 
41 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       42 
     | 
    
         
            -
              name:  
     | 
| 
      
 42 
     | 
    
         
            +
              name: mechanize
         
     | 
| 
       43 
43 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       44 
44 
     | 
    
         
             
                requirements:
         
     | 
| 
       45 
     | 
    
         
            -
                - -  
     | 
| 
      
 45 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       46 
46 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       47 
47 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       48 
48 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       49 
49 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       50 
50 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       51 
51 
     | 
    
         
             
                requirements:
         
     | 
| 
       52 
     | 
    
         
            -
                - -  
     | 
| 
      
 52 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       53 
53 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       54 
54 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       55 
55 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       56 
     | 
    
         
            -
              name:  
     | 
| 
      
 56 
     | 
    
         
            +
              name: libxml-ruby
         
     | 
| 
       57 
57 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       58 
58 
     | 
    
         
             
                requirements:
         
     | 
| 
       59 
     | 
    
         
            -
                - -  
     | 
| 
      
 59 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       60 
60 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       61 
61 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       62 
62 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       63 
63 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       64 
64 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       65 
65 
     | 
    
         
             
                requirements:
         
     | 
| 
       66 
     | 
    
         
            -
                - -  
     | 
| 
      
 66 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       67 
67 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       68 
68 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       69 
69 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       70 
     | 
    
         
            -
              name:  
     | 
| 
      
 70 
     | 
    
         
            +
              name: bio
         
     | 
| 
       71 
71 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       72 
72 
     | 
    
         
             
                requirements:
         
     | 
| 
       73 
     | 
    
         
            -
                - -  
     | 
| 
      
 73 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       74 
74 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       75 
75 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       76 
76 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       77 
77 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       78 
78 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       79 
79 
     | 
    
         
             
                requirements:
         
     | 
| 
       80 
     | 
    
         
            -
                - -  
     | 
| 
      
 80 
     | 
    
         
            +
                - - ">="
         
     | 
| 
       81 
81 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       82 
82 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       83 
83 
     | 
    
         
             
            description: Data sources like PubMed, Entrez Gene, or Gene Ontology
         
     | 
| 
         @@ -88,7 +88,6 @@ extra_rdoc_files: [] 
     | 
|
| 
       88 
88 
     | 
    
         
             
            files:
         
     | 
| 
       89 
89 
     | 
    
         
             
            - etc/allowed_biomart_archives
         
     | 
| 
       90 
90 
     | 
    
         
             
            - etc/biomart/missing_in_archive
         
     | 
| 
       91 
     | 
    
         
            -
            - lib/rbbt/sources/COSMIC.rb
         
     | 
| 
       92 
91 
     | 
    
         
             
            - lib/rbbt/sources/COSTART.rb
         
     | 
| 
       93 
92 
     | 
    
         
             
            - lib/rbbt/sources/CTCAE.rb
         
     | 
| 
       94 
93 
     | 
    
         
             
            - lib/rbbt/sources/HPRD.rb
         
     | 
| 
         @@ -100,11 +99,9 @@ files: 
     | 
|
| 
       100 
99 
     | 
    
         
             
            - lib/rbbt/sources/bibtex.rb
         
     | 
| 
       101 
100 
     | 
    
         
             
            - lib/rbbt/sources/biomart.rb
         
     | 
| 
       102 
101 
     | 
    
         
             
            - lib/rbbt/sources/cath.rb
         
     | 
| 
       103 
     | 
    
         
            -
            - lib/rbbt/sources/dbSNP.rb
         
     | 
| 
       104 
102 
     | 
    
         
             
            - lib/rbbt/sources/ensembl.rb
         
     | 
| 
       105 
103 
     | 
    
         
             
            - lib/rbbt/sources/ensembl_ftp.rb
         
     | 
| 
       106 
104 
     | 
    
         
             
            - lib/rbbt/sources/entrez.rb
         
     | 
| 
       107 
     | 
    
         
            -
            - lib/rbbt/sources/genomes1000.rb
         
     | 
| 
       108 
105 
     | 
    
         
             
            - lib/rbbt/sources/go.rb
         
     | 
| 
       109 
106 
     | 
    
         
             
            - lib/rbbt/sources/gscholar.rb
         
     | 
| 
       110 
107 
     | 
    
         
             
            - lib/rbbt/sources/jochem.rb
         
     | 
| 
         @@ -143,25 +140,25 @@ require_paths: 
     | 
|
| 
       143 
140 
     | 
    
         
             
            - lib
         
     | 
| 
       144 
141 
     | 
    
         
             
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
       145 
142 
     | 
    
         
             
              requirements:
         
     | 
| 
       146 
     | 
    
         
            -
              - -  
     | 
| 
      
 143 
     | 
    
         
            +
              - - ">="
         
     | 
| 
       147 
144 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       148 
145 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       149 
146 
     | 
    
         
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
       150 
147 
     | 
    
         
             
              requirements:
         
     | 
| 
       151 
     | 
    
         
            -
              - -  
     | 
| 
      
 148 
     | 
    
         
            +
              - - ">="
         
     | 
| 
       152 
149 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       153 
150 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       154 
151 
     | 
    
         
             
            requirements: []
         
     | 
| 
       155 
152 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       156 
     | 
    
         
            -
            rubygems_version: 2.2. 
     | 
| 
      
 153 
     | 
    
         
            +
            rubygems_version: 2.2.1
         
     | 
| 
       157 
154 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       158 
155 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       159 
156 
     | 
    
         
             
            summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
         
     | 
| 
       160 
157 
     | 
    
         
             
            test_files:
         
     | 
| 
       161 
     | 
    
         
            -
            - test/rbbt/sources/ 
     | 
| 
       162 
     | 
    
         
            -
            - test/rbbt/sources/test_entrez.rb
         
     | 
| 
      
 158 
     | 
    
         
            +
            - test/rbbt/sources/test_pubmed.rb
         
     | 
| 
       163 
159 
     | 
    
         
             
            - test/rbbt/sources/test_biomart.rb
         
     | 
| 
       164 
160 
     | 
    
         
             
            - test/rbbt/sources/test_gscholar.rb
         
     | 
| 
      
 161 
     | 
    
         
            +
            - test/rbbt/sources/test_entrez.rb
         
     | 
| 
      
 162 
     | 
    
         
            +
            - test/rbbt/sources/test_go.rb
         
     | 
| 
       165 
163 
     | 
    
         
             
            - test/rbbt/sources/test_organism.rb
         
     | 
| 
       166 
     | 
    
         
            -
            - test/rbbt/sources/test_pubmed.rb
         
     | 
| 
       167 
164 
     | 
    
         
             
            - test/test_helper.rb
         
     | 
    
        data/lib/rbbt/sources/COSMIC.rb
    DELETED
    
    | 
         @@ -1,153 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'rbbt'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'rbbt/resource'
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            module COSMIC
         
     | 
| 
       5 
     | 
    
         
            -
              extend Resource
         
     | 
| 
       6 
     | 
    
         
            -
              self.subdir = "share/databases/COSMIC"
         
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
              COSMIC.claim COSMIC.mutations, :proc do 
         
     | 
| 
       9 
     | 
    
         
            -
                url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCompleteExport_v67_241013.tsv.gz"
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
                stream = CMD.cmd('awk \'BEGIN{FS="\t"} { if ($12 != "" && $12 != "Mutation ID") { sub($12, "COSM" $12 ":" $4)}; print}\'', :in => Open.open(url), :pipe => true)
         
     | 
| 
       12 
     | 
    
         
            -
                tsv = TSV.open(stream, :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
         
     | 
| 
       13 
     | 
    
         
            -
                tsv.fields = tsv.fields.collect{|f| f == "Gene name" ? "Associated Gene Name" : f}
         
     | 
| 
       14 
     | 
    
         
            -
                tsv.add_field "Genomic Mutation" do |mid, values|
         
     | 
| 
       15 
     | 
    
         
            -
                  position = values["Mutation GRCh37 genome position"]
         
     | 
| 
       16 
     | 
    
         
            -
                  cds = values["Mutation CDS"]
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                  if position.nil? or position.empty?
         
     | 
| 
       19 
     | 
    
         
            -
                    nil
         
     | 
| 
       20 
     | 
    
         
            -
                  else
         
     | 
| 
       21 
     | 
    
         
            -
                    position = position.split("-").first
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
                    chr, pos = position.split(":")
         
     | 
| 
       24 
     | 
    
         
            -
                    chr = "X" if chr == "23"
         
     | 
| 
       25 
     | 
    
         
            -
                    chr = "Y" if chr == "24"
         
     | 
| 
       26 
     | 
    
         
            -
                    chr = "M" if chr == "25"
         
     | 
| 
       27 
     | 
    
         
            -
                    position = [chr, pos ] * ":"
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                    if cds.nil?
         
     | 
| 
       30 
     | 
    
         
            -
                      position
         
     | 
| 
       31 
     | 
    
         
            -
                    else
         
     | 
| 
       32 
     | 
    
         
            -
                      change = case
         
     | 
| 
       33 
     | 
    
         
            -
                               when cds =~ />/
         
     | 
| 
       34 
     | 
    
         
            -
                                 cds.split(">").last
         
     | 
| 
       35 
     | 
    
         
            -
                               when cds =~ /del/
         
     | 
| 
       36 
     | 
    
         
            -
                                 deletion = cds.split("del").last
         
     | 
| 
       37 
     | 
    
         
            -
                                 case
         
     | 
| 
       38 
     | 
    
         
            -
                                 when deletion =~ /^\d+$/
         
     | 
| 
       39 
     | 
    
         
            -
                                   "-" * deletion.to_i 
         
     | 
| 
       40 
     | 
    
         
            -
                                 when deletion =~ /^[ACTG]+$/i
         
     | 
| 
       41 
     | 
    
         
            -
                                   "-" * deletion.length
         
     | 
| 
       42 
     | 
    
         
            -
                                 else
         
     | 
| 
       43 
     | 
    
         
            -
                                   Log.debug "Unknown deletion: #{ deletion }"
         
     | 
| 
       44 
     | 
    
         
            -
                                   deletion
         
     | 
| 
       45 
     | 
    
         
            -
                                 end
         
     | 
| 
       46 
     | 
    
         
            -
                               when cds =~ /ins/
         
     | 
| 
       47 
     | 
    
         
            -
                                 insertion = cds.split("ins").last
         
     | 
| 
       48 
     | 
    
         
            -
                                 case
         
     | 
| 
       49 
     | 
    
         
            -
                                 when insertion =~ /^\d+$/
         
     | 
| 
       50 
     | 
    
         
            -
                                   "+" + "N" * insertion.to_i 
         
     | 
| 
       51 
     | 
    
         
            -
                                 when insertion =~ /^[NACTG]+$/i
         
     | 
| 
       52 
     | 
    
         
            -
                                   "+" + insertion
         
     | 
| 
       53 
     | 
    
         
            -
                                 else
         
     | 
| 
       54 
     | 
    
         
            -
                                   Log.debug "Unknown insertion: #{insertion }"
         
     | 
| 
       55 
     | 
    
         
            -
                                   insertion
         
     | 
| 
       56 
     | 
    
         
            -
                                 end
         
     | 
| 
       57 
     | 
    
         
            -
                               else
         
     | 
| 
       58 
     | 
    
         
            -
                                 Log.debug "Unknown change: #{cds}"
         
     | 
| 
       59 
     | 
    
         
            -
                                 "?(" << cds << ")"
         
     | 
| 
       60 
     | 
    
         
            -
                               end
         
     | 
| 
       61 
     | 
    
         
            -
                      position + ":" + change
         
     | 
| 
       62 
     | 
    
         
            -
                    end
         
     | 
| 
       63 
     | 
    
         
            -
                  end
         
     | 
| 
       64 
     | 
    
         
            -
                end
         
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
                tsv.to_s.gsub(/(\d)-(\d)/,'\1:\2')
         
     | 
| 
       67 
     | 
    
         
            -
              end
         
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
              COSMIC.claim COSMIC.mutations_hg18, :proc do |filename|
         
     | 
| 
       70 
     | 
    
         
            -
                require 'rbbt/sources/organism'
         
     | 
| 
       71 
     | 
    
         
            -
                file = COSMIC.mutations.open
         
     | 
| 
       72 
     | 
    
         
            -
                begin
         
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
                  while (line = file.gets) !~ /Genomic Mutation/; end
         
     | 
| 
       75 
     | 
    
         
            -
                  fields = line[1..-2].split("\t")
         
     | 
| 
       76 
     | 
    
         
            -
                  mutation_pos = fields.index "Genomic Mutation"
         
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
                  mutations = CMD.cmd("grep -v '^#'|cut -f #{mutation_pos + 1}|sort -u", :in => COSMIC.mutations.open).read.split("\n").select{|m| m.include? ":" }
         
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
                  translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
         
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                  File.open(filename, 'w') do |f|
         
     | 
| 
       83 
     | 
    
         
            -
                    f.puts "#: :type=:list#:namespace=Hsa/may2009"
         
     | 
| 
       84 
     | 
    
         
            -
                    f.puts "#" + fields * "\t"
         
     | 
| 
       85 
     | 
    
         
            -
                    while line = file.gets do
         
     | 
| 
       86 
     | 
    
         
            -
                      next if line[0] == "#"[0]
         
     | 
| 
       87 
     | 
    
         
            -
                      line.strip!
         
     | 
| 
       88 
     | 
    
         
            -
                      parts = line.split("\t")
         
     | 
| 
       89 
     | 
    
         
            -
                      parts[mutation_pos] = translations[parts[mutation_pos]]
         
     | 
| 
       90 
     | 
    
         
            -
                      f.puts parts * "\t"
         
     | 
| 
       91 
     | 
    
         
            -
                    end
         
     | 
| 
       92 
     | 
    
         
            -
                  end
         
     | 
| 
       93 
     | 
    
         
            -
                rescue Exception
         
     | 
| 
       94 
     | 
    
         
            -
                  FileUtils.rm filename if File.exists? filename
         
     | 
| 
       95 
     | 
    
         
            -
                  raise $!
         
     | 
| 
       96 
     | 
    
         
            -
                ensure
         
     | 
| 
       97 
     | 
    
         
            -
                  file.close
         
     | 
| 
       98 
     | 
    
         
            -
                end
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
                nil
         
     | 
| 
       101 
     | 
    
         
            -
              end
         
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
              def self.rsid_index(organism, chromosome = nil)
         
     | 
| 
       105 
     | 
    
         
            -
                build = Organism.hg_build(organism)
         
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
     | 
    
         
            -
                tag = [build, chromosome] * ":"
         
     | 
| 
       108 
     | 
    
         
            -
                fwt = nil
         
     | 
| 
       109 
     | 
    
         
            -
                Persist.persist("StaticPosIndex for COSMIC [#{ tag }]", :fwt, :persist => true) do
         
     | 
| 
       110 
     | 
    
         
            -
                  value_size = 0
         
     | 
| 
       111 
     | 
    
         
            -
                  file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
         
     | 
| 
       112 
     | 
    
         
            -
                  chr_positions = []
         
     | 
| 
       113 
     | 
    
         
            -
                  begin
         
     | 
| 
       114 
     | 
    
         
            -
                    Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
         
     | 
| 
       115 
     | 
    
         
            -
                      next if line[0] == "#"[0]
         
     | 
| 
       116 
     | 
    
         
            -
                      rsid, mutation = line.split("\t").values_at 0, 25
         
     | 
| 
       117 
     | 
    
         
            -
                      next if mutation.nil? or mutation.empty?
         
     | 
| 
       118 
     | 
    
         
            -
                      chr, pos = mutation.split(":")
         
     | 
| 
       119 
     | 
    
         
            -
                      next if chr != chromosome or pos.nil? or pos.empty?
         
     | 
| 
       120 
     | 
    
         
            -
                      chr_positions << [rsid, pos.to_i]
         
     | 
| 
       121 
     | 
    
         
            -
                      value_size = rsid.length if rsid.length > value_size
         
     | 
| 
       122 
     | 
    
         
            -
                    end
         
     | 
| 
       123 
     | 
    
         
            -
                  rescue
         
     | 
| 
       124 
     | 
    
         
            -
                  end
         
     | 
| 
       125 
     | 
    
         
            -
                  fwt = FixWidthTable.new :memory, value_size
         
     | 
| 
       126 
     | 
    
         
            -
                  fwt.add_point(chr_positions)
         
     | 
| 
       127 
     | 
    
         
            -
                  fwt
         
     | 
| 
       128 
     | 
    
         
            -
                end
         
     | 
| 
       129 
     | 
    
         
            -
              end
         
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
       131 
     | 
    
         
            -
              def self.mutation_index(organism)
         
     | 
| 
       132 
     | 
    
         
            -
                build = Organism.hg_build(organism)
         
     | 
| 
       133 
     | 
    
         
            -
                file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
         
     | 
| 
       134 
     | 
    
         
            -
                @mutation_index ||= {}
         
     | 
| 
       135 
     | 
    
         
            -
                @mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
         
     | 
| 
       136 
     | 
    
         
            -
              end
         
     | 
| 
       137 
     | 
    
         
            -
             
     | 
| 
       138 
     | 
    
         
            -
             
     | 
| 
       139 
     | 
    
         
            -
            end
         
     | 
| 
       140 
     | 
    
         
            -
             
     | 
| 
       141 
     | 
    
         
            -
            if defined? Entity
         
     | 
| 
       142 
     | 
    
         
            -
              if defined? Gene and Entity === Gene
         
     | 
| 
       143 
     | 
    
         
            -
                module Gene
         
     | 
| 
       144 
     | 
    
         
            -
                  property :COSMIC_rsids => :single2array do
         
     | 
| 
       145 
     | 
    
         
            -
                    COSMIC.rsid_index(organism, chromosome)[self.chr_range]
         
     | 
| 
       146 
     | 
    
         
            -
                  end
         
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
       148 
     | 
    
         
            -
                  property :COSMIC_mutations => :single2array do
         
     | 
| 
       149 
     | 
    
         
            -
                    GenomicMutation.setup(COSMIC.mutation_index(organism).values_at(*self.COSMIC_rsids).uniq, "COSMIC mutations over #{self.name || self}", organism, false)
         
     | 
| 
       150 
     | 
    
         
            -
                  end
         
     | 
| 
       151 
     | 
    
         
            -
                end
         
     | 
| 
       152 
     | 
    
         
            -
              end
         
     | 
| 
       153 
     | 
    
         
            -
            end
         
     | 
    
        data/lib/rbbt/sources/dbSNP.rb
    DELETED
    
    | 
         @@ -1,194 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'rbbt'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'rbbt/util/open'
         
     | 
| 
       3 
     | 
    
         
            -
            require 'rbbt/resource'
         
     | 
| 
       4 
     | 
    
         
            -
            require 'net/ftp'
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
            module DbSNP
         
     | 
| 
       7 
     | 
    
         
            -
              extend Resource
         
     | 
| 
       8 
     | 
    
         
            -
              self.subdir = "share/databases/dbSNP"
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
              URL = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606/VCF/common_all.vcf.gz"
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
              DbSNP.claim DbSNP.mutations_ncbi, :proc do
         
     | 
| 
       13 
     | 
    
         
            -
                tsv = TSV.setup({}, :key_field => "RS ID", :fields => ["Genomic Mutation"], :type => :flat)
         
     | 
| 
       14 
     | 
    
         
            -
                file = Open.open(URL, :nocache => true) 
         
     | 
| 
       15 
     | 
    
         
            -
                while line = file.gets do
         
     | 
| 
       16 
     | 
    
         
            -
                  next if line[0] == "#"[0]
         
     | 
| 
       17 
     | 
    
         
            -
                  chr, position, id, ref, alt = line.split "\t"
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                  mutations = alt.split(",").collect do |a|
         
     | 
| 
       20 
     | 
    
         
            -
                    if alt[0] == ref[0]
         
     | 
| 
       21 
     | 
    
         
            -
                      alt[0] = '+'[0]
         
     | 
| 
       22 
     | 
    
         
            -
                    end
         
     | 
| 
       23 
     | 
    
         
            -
                    [chr, position, alt] * ":"
         
     | 
| 
       24 
     | 
    
         
            -
                  end
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                  tsv.namespace = "Hsa/may2012"
         
     | 
| 
       27 
     | 
    
         
            -
                  tsv[id] = mutations
         
     | 
| 
       28 
     | 
    
         
            -
                end
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
                tsv.to_s
         
     | 
| 
       31 
     | 
    
         
            -
              end
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
              DbSNP.claim DbSNP.rsids, :proc do |filename|
         
     | 
| 
       34 
     | 
    
         
            -
                ftp = Net::FTP.new('ftp.broadinstitute.org')
         
     | 
| 
       35 
     | 
    
         
            -
                ftp.passive = true
         
     | 
| 
       36 
     | 
    
         
            -
                ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
         
     | 
| 
       37 
     | 
    
         
            -
                ftp.chdir('/bundle/2.3/hg19')
         
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
                tmpfile = TmpFile.tmp_file + '.gz'
         
     | 
| 
       40 
     | 
    
         
            -
                ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
                file = Open.open(tmpfile, :nocache => true) 
         
     | 
| 
       43 
     | 
    
         
            -
                begin
         
     | 
| 
       44 
     | 
    
         
            -
                  File.open(filename, 'w') do |f|
         
     | 
| 
       45 
     | 
    
         
            -
                    f.puts "#: :type=:list#:namespace=Hsa/may2012"
         
     | 
| 
       46 
     | 
    
         
            -
                    f.puts "#" + ["RS ID", "GMAF", "G5", "G5A", "dbSNP Build ID"] * "\t"
         
     | 
| 
       47 
     | 
    
         
            -
                    while line = file.gets do
         
     | 
| 
       48 
     | 
    
         
            -
                      next if line[0] == "#"[0]
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
                      chr, position, id, ref, muts, qual, filter, info = line.split "\t"
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                      g5 = g5a = dbsnp_build_id = gmaf = nil
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                      gmaf = $1 if info =~ /GMAF=([0-9.]+)/
         
     | 
| 
       55 
     | 
    
         
            -
                      g5 = true if info =~ /\bG5\b/
         
     | 
| 
       56 
     | 
    
         
            -
                      g5a = true if info =~ /\bG5A\b/
         
     | 
| 
       57 
     | 
    
         
            -
                      dbsnp_build_id = $1 if info =~ /dbSNPBuildID=(\d+)/
         
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
                      f.puts [id, gmaf, g5, g5a, dbsnp_build_id] * "\t"
         
     | 
| 
       60 
     | 
    
         
            -
                    end
         
     | 
| 
       61 
     | 
    
         
            -
                  end
         
     | 
| 
       62 
     | 
    
         
            -
                rescue Exception
         
     | 
| 
       63 
     | 
    
         
            -
                  FileUtils.rm filename if File.exists? filename
         
     | 
| 
       64 
     | 
    
         
            -
                  raise $!
         
     | 
| 
       65 
     | 
    
         
            -
                ensure
         
     | 
| 
       66 
     | 
    
         
            -
                  file.close
         
     | 
| 
       67 
     | 
    
         
            -
                  FileUtils.rm tmpfile
         
     | 
| 
       68 
     | 
    
         
            -
                end
         
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
                nil
         
     | 
| 
       71 
     | 
    
         
            -
              end
         
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
              DbSNP.claim DbSNP.mutations, :proc do |filename|
         
     | 
| 
       74 
     | 
    
         
            -
                ftp = Net::FTP.new('ftp.broadinstitute.org')
         
     | 
| 
       75 
     | 
    
         
            -
                ftp.passive = true
         
     | 
| 
       76 
     | 
    
         
            -
                ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
         
     | 
| 
       77 
     | 
    
         
            -
                ftp.chdir('/bundle/2.3/hg19')
         
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
                tmpfile = TmpFile.tmp_file + '.gz'
         
     | 
| 
       80 
     | 
    
         
            -
                ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
         
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                file = Open.open(tmpfile, :nocache => true) 
         
     | 
| 
       83 
     | 
    
         
            -
                begin
         
     | 
| 
       84 
     | 
    
         
            -
                  File.open(filename, 'w') do |f|
         
     | 
| 
       85 
     | 
    
         
            -
                    f.puts "#: :type=:flat#:namespace=Hsa/may2012"
         
     | 
| 
       86 
     | 
    
         
            -
                    f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
         
     | 
| 
       87 
     | 
    
         
            -
                    while line = file.gets do
         
     | 
| 
       88 
     | 
    
         
            -
                      next if line[0] == "#"[0]
         
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
                      chr, position, id, ref, muts, qual, filter, info = line.split "\t"
         
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
                      chr.sub!('chr', '')
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                      position, muts = Misc.correct_vcf_mutation(position.to_i, ref, muts)
         
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
                      mutations = muts.collect{|mut| [chr, position, mut] * ":" }
         
     | 
| 
       97 
     | 
    
         
            -
             
     | 
| 
       98 
     | 
    
         
            -
                      f.puts ([id] + mutations) * "\t"
         
     | 
| 
       99 
     | 
    
         
            -
                    end
         
     | 
| 
       100 
     | 
    
         
            -
                  end
         
     | 
| 
       101 
     | 
    
         
            -
                rescue Exception
         
     | 
| 
       102 
     | 
    
         
            -
                  FileUtils.rm filename if File.exists? filename
         
     | 
| 
       103 
     | 
    
         
            -
                  raise $!
         
     | 
| 
       104 
     | 
    
         
            -
                ensure
         
     | 
| 
       105 
     | 
    
         
            -
                  file.close
         
     | 
| 
       106 
     | 
    
         
            -
                  FileUtils.rm tmpfile
         
     | 
| 
       107 
     | 
    
         
            -
                end
         
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
                nil
         
     | 
| 
       110 
     | 
    
         
            -
              end
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
              DbSNP.claim DbSNP.mutations_hg18, :proc do |filename|
         
     | 
| 
       113 
     | 
    
         
            -
                require 'rbbt/sources/organism'
         
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
                mutations = CMD.cmd("grep -v '^#'|cut -f 2|sort -u", :in => DbSNP.mutations.open).read.split("\n").collect{|l| l.split("|")}.flatten
         
     | 
| 
       116 
     | 
    
         
            -
             
     | 
| 
       117 
     | 
    
         
            -
                translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
         
     | 
| 
       118 
     | 
    
         
            -
                begin
         
     | 
| 
       119 
     | 
    
         
            -
                  file = Open.open(DbSNP.mutations.find, :nocache => true) 
         
     | 
| 
       120 
     | 
    
         
            -
                  File.open(filename, 'w') do |f|
         
     | 
| 
       121 
     | 
    
         
            -
                    f.puts "#: :type=:flat#:namespace=Hsa/may2009"
         
     | 
| 
       122 
     | 
    
         
            -
                    f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
         
     | 
| 
       123 
     | 
    
         
            -
                    while line = file.gets do
         
     | 
| 
       124 
     | 
    
         
            -
                      next if line[0] == "#"[0]
         
     | 
| 
       125 
     | 
    
         
            -
                      parts = line.split("\t")
         
     | 
| 
       126 
     | 
    
         
            -
                      parts[1..-1] = parts[1..-1].collect{|p| translations[p]} * "|"
         
     | 
| 
       127 
     | 
    
         
            -
                      f.puts parts * "\t"
         
     | 
| 
       128 
     | 
    
         
            -
                    end
         
     | 
| 
       129 
     | 
    
         
            -
                  end
         
     | 
| 
       130 
     | 
    
         
            -
                rescue Exception
         
     | 
| 
       131 
     | 
    
         
            -
                  FileUtils.rm filename if File.exists? filename
         
     | 
| 
       132 
     | 
    
         
            -
                  raise $!
         
     | 
| 
       133 
     | 
    
         
            -
                ensure
         
     | 
| 
       134 
     | 
    
         
            -
                  file.close
         
     | 
| 
       135 
     | 
    
         
            -
                end
         
     | 
| 
       136 
     | 
    
         
            -
             
     | 
| 
       137 
     | 
    
         
            -
                nil
         
     | 
| 
       138 
     | 
    
         
            -
              end
         
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
       140 
     | 
    
         
            -
              def self.rsid_index(organism, chromosome = nil)
         
     | 
| 
       141 
     | 
    
         
            -
                build = Organism.hg_build(organism)
         
     | 
| 
       142 
     | 
    
         
            -
             
     | 
| 
       143 
     | 
    
         
            -
                tag = [build, chromosome] * ":"
         
     | 
| 
       144 
     | 
    
         
            -
                Persist.persist("StaticPosIndex for dbSNP [#{ tag }]", :fwt, :persist => true) do
         
     | 
| 
       145 
     | 
    
         
            -
                  value_size = 0
         
     | 
| 
       146 
     | 
    
         
            -
                  file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
         
     | 
| 
       147 
     | 
    
         
            -
                  chr_positions = []
         
     | 
| 
       148 
     | 
    
         
            -
                  Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
         
     | 
| 
       149 
     | 
    
         
            -
                    next if line[0] == "#"[0]
         
     | 
| 
       150 
     | 
    
         
            -
                    rsid, mutation = line.split("\t")
         
     | 
| 
       151 
     | 
    
         
            -
                    next if mutation.nil? or mutation.empty?
         
     | 
| 
       152 
     | 
    
         
            -
                    chr, pos = mutation.split(":")
         
     | 
| 
       153 
     | 
    
         
            -
                    next if chr != chromosome or pos.nil? or pos.empty?
         
     | 
| 
       154 
     | 
    
         
            -
                    chr_positions << [rsid, pos.to_i]
         
     | 
| 
       155 
     | 
    
         
            -
                    value_size = rsid.length if rsid.length > value_size
         
     | 
| 
       156 
     | 
    
         
            -
                  end
         
     | 
| 
       157 
     | 
    
         
            -
                  fwt = FixWidthTable.new :memory, value_size
         
     | 
| 
       158 
     | 
    
         
            -
                  fwt.add_point(chr_positions)
         
     | 
| 
       159 
     | 
    
         
            -
                  fwt
         
     | 
| 
       160 
     | 
    
         
            -
                end
         
     | 
| 
       161 
     | 
    
         
            -
              end
         
     | 
| 
       162 
     | 
    
         
            -
             
     | 
| 
       163 
     | 
    
         
            -
              def self.mutation_index(organism)
         
     | 
| 
       164 
     | 
    
         
            -
                build = Organism.hg_build(organism)
         
     | 
| 
       165 
     | 
    
         
            -
                file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
         
     | 
| 
       166 
     | 
    
         
            -
                @mutation_index ||= {}
         
     | 
| 
       167 
     | 
    
         
            -
                @mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
         
     | 
| 
       168 
     | 
    
         
            -
              end
         
     | 
| 
       169 
     | 
    
         
            -
             
     | 
| 
       170 
     | 
    
         
            -
            end
         
     | 
| 
       171 
     | 
    
         
            -
             
     | 
| 
       172 
     | 
    
         
            -
            if defined? Entity
         
     | 
| 
       173 
     | 
    
         
            -
              if defined? Gene and Entity === Gene
         
     | 
| 
       174 
     | 
    
         
            -
                module Gene
         
     | 
| 
       175 
     | 
    
         
            -
                  property :dbSNP_rsids => :single2array do
         
     | 
| 
       176 
     | 
    
         
            -
                    DbSNP.rsid_index(organism, chromosome)[self.chr_range]
         
     | 
| 
       177 
     | 
    
         
            -
                  end
         
     | 
| 
       178 
     | 
    
         
            -
             
     | 
| 
       179 
     | 
    
         
            -
                  property :dbSNP_mutations => :single2array do
         
     | 
| 
       180 
     | 
    
         
            -
                    GenomicMutation.setup(DbSNP.mutation_index(organism).values_at(*self.dbSNP_rsids).compact.flatten.uniq, "dbSNP mutations over #{self.name || self}", organism, true)
         
     | 
| 
       181 
     | 
    
         
            -
                  end
         
     | 
| 
       182 
     | 
    
         
            -
                end
         
     | 
| 
       183 
     | 
    
         
            -
              end
         
     | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
       185 
     | 
    
         
            -
              if defined? GenomicMutation and Entity === GenomicMutation
         
     | 
| 
       186 
     | 
    
         
            -
                module GenomicMutation
         
     | 
| 
       187 
     | 
    
         
            -
                  property :dbSNP => :array2single do
         
     | 
| 
       188 
     | 
    
         
            -
                    dbSNP.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["RS ID"], :type => :single).values_at *self
         
     | 
| 
       189 
     | 
    
         
            -
                  end
         
     | 
| 
       190 
     | 
    
         
            -
                end
         
     | 
| 
       191 
     | 
    
         
            -
             
     | 
| 
       192 
     | 
    
         
            -
              end
         
     | 
| 
       193 
     | 
    
         
            -
            end
         
     | 
| 
       194 
     | 
    
         
            -
             
     | 
| 
         @@ -1,109 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'rbbt'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'rbbt/util/open'
         
     | 
| 
       3 
     | 
    
         
            -
            require 'rbbt/resource'
         
     | 
| 
       4 
     | 
    
         
            -
            require 'rbbt/entity/gene'
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
            module Genomes1000
         
     | 
| 
       7 
     | 
    
         
            -
              extend Resource
         
     | 
| 
       8 
     | 
    
         
            -
              self.subdir = "share/databases/genomes_1000"
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
              RELEASE_URL = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20110521/ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz"
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
              Genomes1000.claim Genomes1000.mutations, :proc do |filename|
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                begin
         
     | 
| 
       15 
     | 
    
         
            -
                  Open.write(filename) do |file|
         
     | 
| 
       16 
     | 
    
         
            -
                    file.puts "#: :type=:single#:namespace=Hsa"
         
     | 
| 
       17 
     | 
    
         
            -
                    file.puts "#Variant ID\tGenomic Mutation"
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                    Open.read(RELEASE_URL) do |line|
         
     | 
| 
       20 
     | 
    
         
            -
                      next if line[0] == "#"[0]
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                      chromosome, position, id, references, alternative, quality, filter, info = line.split("\t")
         
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                      file.puts [id, [chromosome, position, alternative] * ":"] * "\t"
         
     | 
| 
       25 
     | 
    
         
            -
                    end
         
     | 
| 
       26 
     | 
    
         
            -
                  end
         
     | 
| 
       27 
     | 
    
         
            -
                rescue
         
     | 
| 
       28 
     | 
    
         
            -
                  FileUtils.rm filename if File.exists? filename
         
     | 
| 
       29 
     | 
    
         
            -
                  raise $!
         
     | 
| 
       30 
     | 
    
         
            -
                end
         
     | 
| 
       31 
     | 
    
         
            -
                nil
         
     | 
| 
       32 
     | 
    
         
            -
              end
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
              Genomes1000.claim Genomes1000.mutations_hg18, :proc do
         
     | 
| 
       36 
     | 
    
         
            -
                require 'rbbt/sources/organism'
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
                hg19_tsv = Genomes1000.mutations.tsv :unnamed => true
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                mutations = hg19_tsv.values
         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
                translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
                tsv = hg19_tsv.process "Genomic Mutation" do |mutation|
         
     | 
| 
       45 
     | 
    
         
            -
                  translations[mutation]
         
     | 
| 
       46 
     | 
    
         
            -
                end
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
                tsv.namespace = "Hsa/may2009"
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
                tsv.to_s
         
     | 
| 
       51 
     | 
    
         
            -
              end
         
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
              def self.rsid_index(organism, chromosome = nil)
         
     | 
| 
       54 
     | 
    
         
            -
                build = Organism.hg_build(organism)
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                tag = [build, chromosome] * ":"
         
     | 
| 
       57 
     | 
    
         
            -
                Persist.persist("StaticPosIndex for Genomes1000 [#{ tag }]", :fwt, :persist => true) do
         
     | 
| 
       58 
     | 
    
         
            -
                  value_size = 0
         
     | 
| 
       59 
     | 
    
         
            -
                  file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
         
     | 
| 
       60 
     | 
    
         
            -
                  chr_positions = []
         
     | 
| 
       61 
     | 
    
         
            -
                  Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
         
     | 
| 
       62 
     | 
    
         
            -
                    next if line[0] == "#"[0]
         
     | 
| 
       63 
     | 
    
         
            -
                    rsid, mutation = line.split("\t")
         
     | 
| 
       64 
     | 
    
         
            -
                    next if mutation.nil? or mutation.empty?
         
     | 
| 
       65 
     | 
    
         
            -
                    chr, pos = mutation.split(":")
         
     | 
| 
       66 
     | 
    
         
            -
                    next if chr != chromosome or pos.nil? or pos.empty?
         
     | 
| 
       67 
     | 
    
         
            -
                    chr_positions << [rsid, pos.to_i]
         
     | 
| 
       68 
     | 
    
         
            -
                    value_size = rsid.length if rsid.length > value_size
         
     | 
| 
       69 
     | 
    
         
            -
                  end
         
     | 
| 
       70 
     | 
    
         
            -
                  fwt = FixWidthTable.new :memory, value_size
         
     | 
| 
       71 
     | 
    
         
            -
                  fwt.add_point(chr_positions)
         
     | 
| 
       72 
     | 
    
         
            -
                  fwt
         
     | 
| 
       73 
     | 
    
         
            -
                end
         
     | 
| 
       74 
     | 
    
         
            -
              end
         
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
              def self.mutation_index(organism)
         
     | 
| 
       77 
     | 
    
         
            -
                build = Organism.hg_build(organism)
         
     | 
| 
       78 
     | 
    
         
            -
                file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
         
     | 
| 
       79 
     | 
    
         
            -
                @mutation_index ||= {}
         
     | 
| 
       80 
     | 
    
         
            -
                @mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
         
     | 
| 
       81 
     | 
    
         
            -
              end
         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
            end
         
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
            if defined? Entity
         
     | 
| 
       88 
     | 
    
         
            -
              if defined? Gene and Entity === Gene
         
     | 
| 
       89 
     | 
    
         
            -
                module Gene
         
     | 
| 
       90 
     | 
    
         
            -
                  property :genomes_1000_rsids => :single2array do
         
     | 
| 
       91 
     | 
    
         
            -
                    Genomes1000.rsid_index(organism, chromosome)[self.chr_range]
         
     | 
| 
       92 
     | 
    
         
            -
                  end
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                  property :genomes_1000_mutations => :single2array do
         
     | 
| 
       95 
     | 
    
         
            -
                    GenomicMutation.setup(Genomes1000.mutation_index(organism).values_at(*self.genomes_1000_rsids).uniq, "1000 Genomes mutations over #{self.name || self}", organism, true)
         
     | 
| 
       96 
     | 
    
         
            -
                  end
         
     | 
| 
       97 
     | 
    
         
            -
                end
         
     | 
| 
       98 
     | 
    
         
            -
              end
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
              if defined? GenomicMutation and Entity === GenomicMutation
         
     | 
| 
       101 
     | 
    
         
            -
                module GenomicMutation
         
     | 
| 
       102 
     | 
    
         
            -
                  property :genomes_1000 => :array2single do
         
     | 
| 
       103 
     | 
    
         
            -
                    Genomes1000.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["Variant ID"], :type => :single).values_at *self
         
     | 
| 
       104 
     | 
    
         
            -
                  end
         
     | 
| 
       105 
     | 
    
         
            -
                end
         
     | 
| 
       106 
     | 
    
         
            -
              end
         
     | 
| 
       107 
     | 
    
         
            -
            end
         
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
             
     |