rbbt-sources 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
    
        data/lib/rbbt/sources/biomart.rb
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            require 'rbbt'
         | 
| 1 2 | 
             
            require 'rbbt/util/tsv'
         | 
| 2 3 | 
             
            require 'rbbt/util/log'
         | 
| 3 4 | 
             
            require 'cgi'
         | 
| @@ -13,6 +14,8 @@ module BioMart | |
| 13 14 |  | 
| 14 15 | 
             
              BIOMART_URL = 'http://biomart.org/biomart/martservice?query='
         | 
| 15 16 |  | 
| 17 | 
            +
              MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.yaml
         | 
| 18 | 
            +
             | 
| 16 19 | 
             
              private
         | 
| 17 20 |  | 
| 18 21 | 
             
              @@biomart_query_xml = <<-EOT
         | 
| @@ -28,12 +31,14 @@ module BioMart | |
| 28 31 | 
             
              EOT
         | 
| 29 32 |  | 
| 30 33 | 
             
              def self.set_archive(date)
         | 
| 34 | 
            +
                @archive = date
         | 
| 31 35 | 
             
                @archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
         | 
| 32 36 | 
             
                Log.debug "Using Archive URL #{ @archive_url }"
         | 
| 33 37 | 
             
              end
         | 
| 34 38 |  | 
| 35 39 | 
             
              def self.unset_archive
         | 
| 36 40 | 
             
                Log.debug "Restoring current version URL #{BIOMART_URL}"
         | 
| 41 | 
            +
                @archive = nil
         | 
| 37 42 | 
             
                @archive_url = nil
         | 
| 38 43 | 
             
              end
         | 
| 39 44 |  | 
| @@ -61,15 +66,16 @@ module BioMart | |
| 61 66 | 
             
                result_file = TmpFile.tmp_file
         | 
| 62 67 | 
             
                Open.write(result_file, response)
         | 
| 63 68 |  | 
| 69 | 
            +
                new_datafile = TmpFile.tmp_file
         | 
| 64 70 | 
             
                if data.nil?
         | 
| 65 | 
            -
                   | 
| 71 | 
            +
                  TSV.merge_rows Open.open(result_file), new_datafile
         | 
| 72 | 
            +
                  data = new_datafile
         | 
| 66 73 | 
             
                else
         | 
| 67 | 
            -
                  new_datafile = TmpFile.tmp_file
         | 
| 68 74 | 
             
                  TSV.paste_merge data, result_file, new_datafile
         | 
| 69 75 | 
             
                  FileUtils.rm data
         | 
| 70 76 | 
             
                  data = new_datafile
         | 
| 71 | 
            -
                  FileUtils.rm result_file
         | 
| 72 77 | 
             
                end
         | 
| 78 | 
            +
                FileUtils.rm result_file
         | 
| 73 79 |  | 
| 74 80 | 
             
                data
         | 
| 75 81 | 
             
              end
         | 
| @@ -91,7 +97,8 @@ module BioMart | |
| 91 97 | 
             
              # cause an error if the BioMart WS does not allow filtering with that
         | 
| 92 98 | 
             
              # attribute.
         | 
| 93 99 | 
             
              def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
         | 
| 94 | 
            -
                open_options = Misc.add_defaults open_options, :nocache => false
         | 
| 100 | 
            +
                open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil
         | 
| 101 | 
            +
                filename, field_names = Misc.process_options open_options, :filename, :field_names
         | 
| 95 102 | 
             
                attrs   ||= []
         | 
| 96 103 |  | 
| 97 104 | 
             
                open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
         | 
| @@ -118,22 +125,41 @@ module BioMart | |
| 118 125 | 
             
                  data = get(database, main, chunk, filters, data, open_options)
         | 
| 119 126 | 
             
                }
         | 
| 120 127 |  | 
| 121 | 
            -
                 | 
| 122 | 
            -
                 | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
                 | 
| 128 | 
            +
                open_options[:filename] ||= "BioMart: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}" 
         | 
| 129 | 
            +
                if filename.nil?
         | 
| 130 | 
            +
                  results = TSV.new data, open_options
         | 
| 131 | 
            +
                  results.key_field = main
         | 
| 132 | 
            +
                  results.fields = attrs
         | 
| 133 | 
            +
                  results
         | 
| 134 | 
            +
                else
         | 
| 135 | 
            +
                  Open.write(filename) do |f|
         | 
| 136 | 
            +
                    f.puts "#: " << Misc.hash2string(TSV::EXTRA_ACCESSORS.collect{|key| [key, open_options[key]]})
         | 
| 137 | 
            +
                    if field_names.nil?
         | 
| 138 | 
            +
                      f.puts "#" << [main, attrs].flatten * "\t"
         | 
| 139 | 
            +
                    else
         | 
| 140 | 
            +
                      f.puts "#" << field_names * "\t"
         | 
| 141 | 
            +
                    end
         | 
| 142 | 
            +
                    f.write Open.read(data)
         | 
| 143 | 
            +
                  end
         | 
| 144 | 
            +
                  FileUtils.rm data
         | 
| 145 | 
            +
                  filename
         | 
| 146 | 
            +
                end
         | 
| 128 147 | 
             
              end
         | 
| 129 148 |  | 
| 130 149 | 
             
              def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
         | 
| 131 | 
            -
                 | 
| 132 | 
            -
             | 
| 150 | 
            +
                if @archive_url 
         | 
| 151 | 
            +
                  attrs = attrs.reject{|attr| MISSING_IN_ARCHIVE[@archive].include? attr[1]}
         | 
| 152 | 
            +
                end
         | 
| 133 153 |  | 
| 134 | 
            -
                 | 
| 135 | 
            -
                 | 
| 136 | 
            -
             | 
| 154 | 
            +
                codes = attrs.collect{|attr| attr[1]}
         | 
| 155 | 
            +
                if open_options[:filename].nil?
         | 
| 156 | 
            +
                  tsv = query(database, main.last, codes, filters, data, open_options)
         | 
| 157 | 
            +
                  tsv.key_field = main.first
         | 
| 158 | 
            +
                  tsv.fields    = attrs.collect{|attr| attr.first} 
         | 
| 159 | 
            +
                  tsv
         | 
| 160 | 
            +
                else
         | 
| 161 | 
            +
                  query(database, main.last, codes, filters, data, open_options.merge(:field_names => [main.first, attrs.collect{|attr| attr.first}].flatten))
         | 
| 162 | 
            +
                end
         | 
| 137 163 | 
             
              end
         | 
| 138 164 | 
             
            end
         | 
| 139 165 |  | 
| @@ -29,6 +29,7 @@ module Organism | |
| 29 29 | 
             
                options = Misc.add_defaults options, :persistence => true, :case_insensitive => true, :double => false
         | 
| 30 30 | 
             
                double = Misc.process_options options, :double
         | 
| 31 31 |  | 
| 32 | 
            +
             | 
| 32 33 | 
             
                options.merge! :target => target unless target.nil?
         | 
| 33 34 | 
             
                options.merge! :fields => fields unless fields.nil?
         | 
| 34 35 |  | 
| @@ -69,7 +70,7 @@ module Organism | |
| 69 70 | 
             
                }.first
         | 
| 70 71 | 
             
              end
         | 
| 71 72 |  | 
| 72 | 
            -
              ["Hsa", "Sce"].each do |organism|
         | 
| 73 | 
            +
              ["Hsa", "Rno", "Sce"].each do |organism|
         | 
| 73 74 | 
             
                rakefile = Rbbt["share/install/Organism/#{ organism }/Rakefile"]
         | 
| 74 75 | 
             
                rakefile.lib_dir = Resource.caller_lib_dir __FILE__
         | 
| 75 76 | 
             
                rakefile.pkgdir = 'phgx'
         | 
| @@ -4,6 +4,8 @@ require 'bio' | |
| 4 4 | 
             
            # Sequence analyses
         | 
| 5 5 | 
             
            module Organism
         | 
| 6 6 | 
             
              extend WorkFlow
         | 
| 7 | 
            +
              relative_to Rbbt, "share/organisms"
         | 
| 8 | 
            +
              self.jobdir = Rbbt.var.organism.find
         | 
| 7 9 |  | 
| 8 10 | 
             
              def self.coding_transcripts_for_exon(org, exon, exon_transcripts, transcript_info)
         | 
| 9 11 | 
             
                exon_transcripts ||= Organism.transcript_exons(org).tsv(:double, :key => "Ensembl Exon ID", :fields => ["Ensembl Transcript ID"], :merge => true, :persistence => true )
         | 
| @@ -201,16 +203,59 @@ module Organism | |
| 201 203 | 
             
                position_offsets
         | 
| 202 204 | 
             
              end
         | 
| 203 205 |  | 
| 204 | 
            -
              task_option : | 
| 206 | 
            +
              task_option :organism, "Organism", :string, "Hsa"
         | 
| 205 207 | 
             
              task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
         | 
| 206 | 
            -
               | 
| 208 | 
            +
              task_dependencies nil
         | 
| 209 | 
            +
              task :genomic_mutations_to_genes => :tsv do |org,genomic_mutations|
         | 
| 210 | 
            +
                genomic_mutations = case
         | 
| 211 | 
            +
                                    when TSV === genomic_mutations
         | 
| 212 | 
            +
                                      genomic_mutations
         | 
| 213 | 
            +
                                    else
         | 
| 214 | 
            +
                                      TSV.new StringIO.new(genomic_mutations), :list
         | 
| 215 | 
            +
                                    end
         | 
| 216 | 
            +
                genomic_mutations.key_field = "Position"
         | 
| 217 | 
            +
                genomic_mutations.fields = ["Mutation"]
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                positions = genomic_mutations.keys.collect{|l| l.split(":")}
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                step(:resources, "Load Resources")
         | 
| 222 | 
            +
                genes_at_positions = Hash[*genomic_mutations.keys.zip(Organism.genes_at_genomic_positions(org, positions)).flatten]
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                genomic_mutations.add_field "#{org.sub(/\/.*/,'')}:Ensembl Gene ID" do |position, values|
         | 
| 225 | 
            +
                  genes_at_positions[position]
         | 
| 226 | 
            +
                end
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                genomic_mutations
         | 
| 229 | 
            +
              end
         | 
| 230 | 
            +
             | 
| 231 | 
            +
             | 
| 232 | 
            +
              task_description <<-EOF
         | 
| 233 | 
            +
            Translates a collection of mutations in genomic coordinates into mutations in aminoacids for the
         | 
| 234 | 
            +
            protein products of transcripts including those positions.
         | 
| 235 | 
            +
              EOF
         | 
| 236 | 
            +
              task_option :organism, "Organism", :string, "Hsa"
         | 
| 237 | 
            +
              task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
         | 
| 238 | 
            +
              task_dependencies nil
         | 
| 239 | 
            +
              task :genomic_mutations_to_protein_mutations => :tsv do |org,genomic_mutations|
         | 
| 240 | 
            +
                genomic_mutations = case
         | 
| 241 | 
            +
                                    when TSV === genomic_mutations
         | 
| 242 | 
            +
                                      genomic_mutations
         | 
| 243 | 
            +
                                    else
         | 
| 244 | 
            +
                                      TSV.new StringIO.new(genomic_mutations), :list
         | 
| 245 | 
            +
                                    end
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                genomic_mutations.key_field = "Position"
         | 
| 248 | 
            +
                genomic_mutations.fields = ["Mutation"]
         | 
| 249 | 
            +
             | 
| 207 250 | 
             
                positions = genomic_mutations.keys.collect{|l| l.split(":")}
         | 
| 208 251 |  | 
| 209 252 | 
             
                step(:prepare, "Prepare Results")
         | 
| 210 253 | 
             
                results = TSV.new({})
         | 
| 211 254 | 
             
                results.key_field = "Position"
         | 
| 212 | 
            -
                results.fields = ["Ensembl Transcript ID", "Mutation"]
         | 
| 255 | 
            +
                results.fields = ["#{org.sub(/\/.*/,'')}:Ensembl Transcript ID", "Protein Mutation"]
         | 
| 213 256 | 
             
                results.type = :double
         | 
| 257 | 
            +
                results.filename = path
         | 
| 258 | 
            +
             | 
| 214 259 |  | 
| 215 260 | 
             
                step(:resources, "Load Resources")
         | 
| 216 261 | 
             
                transcript_sequence = Organism.transcript_sequence(org).tsv(:single, :persistence => true)
         | 
| @@ -229,7 +274,6 @@ module Organism | |
| 229 274 |  | 
| 230 275 | 
             
                  transcripts.each do |transcript, offset_info|
         | 
| 231 276 | 
             
                    offset, strand = offset_info
         | 
| 232 | 
            -
                    ddd strand
         | 
| 233 277 | 
             
                    begin
         | 
| 234 278 | 
             
                      codon = Organism.codon_at_transcript_position(org, transcript, offset, transcript_sequence, transcript_5utr)
         | 
| 235 279 | 
             
                    rescue
         | 
| @@ -237,12 +281,9 @@ module Organism | |
| 237 281 | 
             
                      next
         | 
| 238 282 | 
             
                    end
         | 
| 239 283 |  | 
| 240 | 
            -
                    ddd codon
         | 
| 241 284 | 
             
                    if not codon.nil?
         | 
| 242 285 | 
             
                      alleles.each do |allele|
         | 
| 243 | 
            -
                        ddd allele
         | 
| 244 286 | 
             
                        allele = Misc::BASE2COMPLEMENT[allele] if strand == -1
         | 
| 245 | 
            -
                        ddd allele
         | 
| 246 287 | 
             
                        change = Organism.codon_change(allele, *codon.values_at(0,1))
         | 
| 247 288 | 
             
                        pos_code = position * ":"
         | 
| 248 289 | 
             
                        mutation = [change.first, codon.last + 1, change.last] * ""
         | 
| @@ -323,7 +364,7 @@ X	10085674	C	T | |
| 323 364 | 
             
              #positions =  positions.select ["10:98099540"]
         | 
| 324 365 |  | 
| 325 366 | 
             
              Organism.basedir = Rbbt.tmp.organism.sequence.jobs.find :user
         | 
| 326 | 
            -
              job =  Organism.job : | 
| 367 | 
            +
              job =  Organism.job :genomic_mutations_to_protein_mutations, "Metastasis", org, positions.slice("Tumor")
         | 
| 327 368 | 
             
              job.run
         | 
| 328 369 |  | 
| 329 370 | 
             
              while not job.done?
         | 
| @@ -0,0 +1,44 @@ | |
| 1 | 
            +
            $LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
         | 
| 2 | 
            +
            require 'rbbt/sources/biomart'
         | 
| 3 | 
            +
            require 'rbbt/sources/entrez'
         | 
| 4 | 
            +
            require File.join(File.dirname(__FILE__), '../../lib/helpers')
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            $taxs = [10116]
         | 
| 7 | 
            +
            $scientific_name = "Rattus norvegicus"
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            $biomart_db = 'rnorvegicus_gene_ensembl'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            $biomart_lexicon = [ 
         | 
| 12 | 
            +
              [ 'Associated Gene Name' , "external_gene_id"], 
         | 
| 13 | 
            +
              [ 'HGNC symbol', "hgnc_symbol"  ],
         | 
| 14 | 
            +
              [ 'HGNC automatic gene name', "hgnc_automatic_gene_name"  ],
         | 
| 15 | 
            +
              [ 'HGNC curated gene name ', "hgnc_curated_gene_name"  ],
         | 
| 16 | 
            +
            ]
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            $biomart_identifiers = [ 
         | 
| 19 | 
            +
              ['Associated Gene Name' , "external_gene_id"], 
         | 
| 20 | 
            +
              ['Protein ID' , "protein_id"] , 
         | 
| 21 | 
            +
              ['UniProt/SwissProt ID' , "uniprot_swissprot"] , 
         | 
| 22 | 
            +
              ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] , 
         | 
| 23 | 
            +
              ['RefSeq Protein ID' , "refseq_peptide"] , 
         | 
| 24 | 
            +
              ['RefSeq DNA ID' , "refseq_dna"] , 
         | 
| 25 | 
            +
              ['EMBL (Genbank) ID' , "embl"] , 
         | 
| 26 | 
            +
              ['RGD ID' , "rgd"] , 
         | 
| 27 | 
            +
              ['RGD Symbol' , "rgd_symbol"] , 
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              ['Affy rae230a', "affy_rae230a"],
         | 
| 30 | 
            +
              ['Affy rae230b', "affy_rae230b"],
         | 
| 31 | 
            +
              ['Affy RaGene', "affy_ragene_1_0_st_v1"],
         | 
| 32 | 
            +
              ['Affy rat230 2', "affy_rat230_2"],
         | 
| 33 | 
            +
              ['Affy RaEx', "affy_raex_1_0_st_v1"],
         | 
| 34 | 
            +
              ['Affy rg u34a', "affy_rg_u34a"],
         | 
| 35 | 
            +
              ['Affy rg u34b', "affy_rg_u34b"],
         | 
| 36 | 
            +
              ['Affy rg u34c', "affy_rg_u34c"],
         | 
| 37 | 
            +
              ['Affy rn u34', "affy_rn_u34"],
         | 
| 38 | 
            +
              ['Affy rt u34', "affy_rt_u34"],
         | 
| 39 | 
            +
              ['Agilent WholeGenome',"agilent_wholegenome" ],
         | 
| 40 | 
            +
              ['Codelink ID ', "codelink"],
         | 
| 41 | 
            +
            ]
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            $namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
         | 
| 44 | 
            +
            load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
         | 
| @@ -103,7 +103,7 @@ file 'scientific_name' do |t| | |
| 103 103 | 
             
            end
         | 
| 104 104 |  | 
| 105 105 | 
             
            file 'identifiers' do |t|
         | 
| 106 | 
            -
              identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [])
         | 
| 106 | 
            +
              identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
         | 
| 107 107 | 
             
              $biomart_identifiers.each do |name, key, prefix|
         | 
| 108 108 | 
             
                if prefix
         | 
| 109 109 | 
             
                  identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
         | 
| @@ -114,20 +114,20 @@ file 'identifiers' do |t| | |
| 114 114 | 
             
            end
         | 
| 115 115 |  | 
| 116 116 | 
             
            file 'gene_transcripts' do |t|
         | 
| 117 | 
            -
              transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat)
         | 
| 117 | 
            +
              transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
         | 
| 118 118 |  | 
| 119 119 | 
             
              File.open(t.name, 'w') do |f| f.puts transcripts end
         | 
| 120 120 | 
             
            end
         | 
| 121 121 |  | 
| 122 122 | 
             
            file 'transcripts' => 'gene_positions' do |t|
         | 
| 123 | 
            -
              transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list)
         | 
| 123 | 
            +
              transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
         | 
| 124 124 | 
             
              transcripts.attach TSV.new('gene_positions'), "Chromosome Name"
         | 
| 125 125 |  | 
| 126 126 | 
             
              File.open(t.name, 'w') do |f| f.puts transcripts end
         | 
| 127 127 | 
             
            end
         | 
| 128 128 |  | 
| 129 129 | 
             
            file 'transcript_3utr' do |t|
         | 
| 130 | 
            -
              utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, : | 
| 130 | 
            +
              utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
         | 
| 131 131 |  | 
| 132 132 | 
             
              File.open(t.name, 'w') do |f| 
         | 
| 133 133 | 
             
                f.puts "#: :type=:single#cast=to_i"
         | 
| @@ -142,7 +142,7 @@ end | |
| 142 142 |  | 
| 143 143 |  | 
| 144 144 | 
             
            file 'transcript_5utr' do |t|
         | 
| 145 | 
            -
              utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, : | 
| 145 | 
            +
              utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
         | 
| 146 146 |  | 
| 147 147 | 
             
              File.open(t.name, 'w') do |f| 
         | 
| 148 148 | 
             
                f.puts "#: :type=:single#cast=to_i"
         | 
| @@ -162,7 +162,7 @@ file 'gene_positions' do |t| | |
| 162 162 | 
             
            end
         | 
| 163 163 |  | 
| 164 164 | 
             
            file 'gene_sequence' do |t|
         | 
| 165 | 
            -
              sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, : | 
| 165 | 
            +
              sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
         | 
| 166 166 |  | 
| 167 167 | 
             
              File.open(t.name, 'w') do |f| 
         | 
| 168 168 | 
             
                f.puts "#: :type=:single"
         | 
| @@ -179,7 +179,7 @@ file 'gene_sequence' do |t| | |
| 179 179 | 
             
            end
         | 
| 180 180 |  | 
| 181 181 | 
             
            file 'protein_sequence' do |t|
         | 
| 182 | 
            -
              sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, : | 
| 182 | 
            +
              sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace)
         | 
| 183 183 |  | 
| 184 184 | 
             
              File.open(t.name, 'w') do |f| 
         | 
| 185 185 | 
             
                f.puts "#: :type=:single"
         | 
| @@ -197,20 +197,20 @@ file 'protein_sequence' do |t| | |
| 197 197 | 
             
            end
         | 
| 198 198 |  | 
| 199 199 | 
             
            file 'exons' => 'gene_positions' do |t|
         | 
| 200 | 
            -
              exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list)
         | 
| 200 | 
            +
              exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
         | 
| 201 201 | 
             
              exons.attach TSV.new('gene_positions'), "Chromosome Name"
         | 
| 202 202 |  | 
| 203 203 | 
             
              File.open(t.name, 'w') do |f| f.puts exons end
         | 
| 204 204 | 
             
            end
         | 
| 205 205 |  | 
| 206 206 | 
             
            file 'transcript_exons' do |t|
         | 
| 207 | 
            -
              exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true)
         | 
| 207 | 
            +
              exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace => $namespace)
         | 
| 208 208 |  | 
| 209 209 | 
             
              File.open(t.name, 'w') do |f| f.puts exons end
         | 
| 210 210 | 
             
            end
         | 
| 211 211 |  | 
| 212 212 | 
             
            file 'transcript_sequence' do |t|
         | 
| 213 | 
            -
              sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, : | 
| 213 | 
            +
              sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
         | 
| 214 214 |  | 
| 215 215 | 
             
              File.open(t.name, 'w') do |f| 
         | 
| 216 216 | 
             
                f.puts "#: :type=:single"
         | 
| @@ -232,28 +232,28 @@ $biomart_variation_filter = ["snptype_filters", "COMPLEX_INDEL,COMPLEX_INDEL&NMD | |
| 232 232 | 
             
            $biomart_variation_filter = ["snptype_filters", 'COMPLEX_INDEL&NMD_TRANSCRIPT']
         | 
| 233 233 |  | 
| 234 234 | 
             
            file 'germline_variations' do |t|
         | 
| 235 | 
            -
              variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variations, [], nil, :keep_empty => true, :type => :list, : | 
| 236 | 
            -
              File.open(t.name, 'w') do |f| f.puts variations.to_s end
         | 
| 235 | 
            +
              variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variations, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
         | 
| 237 236 | 
             
            end
         | 
| 238 237 |  | 
| 239 238 | 
             
            file 'germline_variation_positions' do |t|
         | 
| 240 | 
            -
              variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variation_positions, [], nil, :keep_empty => true, :type => :list, : | 
| 239 | 
            +
              variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variation_positions, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
         | 
| 241 240 | 
             
              File.open(t.name, 'w') do |f| f.puts variations.to_s end
         | 
| 242 241 | 
             
            end
         | 
| 243 242 |  | 
| 244 243 | 
             
            file 'somatic_variations' do |t|
         | 
| 245 | 
            -
              variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variations, [], nil, :keep_empty => true, :type => :list, : | 
| 244 | 
            +
              variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variations, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
         | 
| 246 245 | 
             
              File.open(t.name, 'w') do |f| f.puts variations.to_s end
         | 
| 247 246 | 
             
            end
         | 
| 248 247 |  | 
| 249 248 | 
             
            file 'somatic_variation_positions' do |t|
         | 
| 250 | 
            -
              variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variation_positions, [], nil, :keep_empty => true, :type => :list, : | 
| 249 | 
            +
              variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variation_positions, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
         | 
| 251 250 | 
             
              File.open(t.name, 'w') do |f| f.puts variations.to_s end
         | 
| 252 251 | 
             
            end
         | 
| 253 252 |  | 
| 254 253 | 
             
            file 'gene_pmids' do |t|
         | 
| 255 254 | 
             
              tsv =  Entrez.entrez2pubmed($taxs)
         | 
| 256 | 
            -
              text = " | 
| 255 | 
            +
              text = "#: :namespace=#{$namespace}"
         | 
| 256 | 
            +
              text += "#Entrez Gene ID\tPMID"
         | 
| 257 257 | 
             
              tsv.each do |gene, pmids|
         | 
| 258 258 | 
             
                text << "\n" << gene << "\t" << pmids * "|"
         | 
| 259 259 | 
             
              end
         | 
| @@ -270,7 +270,8 @@ file 'exon_offsets' => %w(exons transcript_exons gene_transcripts transcripts tr | |
| 270 270 | 
             
              transcript_exons = TSV.new('transcript_exons', :double, :fields => ["Ensembl Exon ID","Exon Rank in Transcript"], :persistence => true )
         | 
| 271 271 |  | 
| 272 272 |  | 
| 273 | 
            -
              string = " | 
| 273 | 
            +
              string = "#: :namespace=#{$namespace}"
         | 
| 274 | 
            +
              string += "#Ensembl Exon ID\tEnsembl Transcript ID\tOffset\n"
         | 
| 274 275 | 
             
              exons.each do |exon, info|
         | 
| 275 276 | 
             
                gene, start, finish, strand, chr = info
         | 
| 276 277 |  | 
| @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            require File.dirname(__FILE__) + '/../../test_helper'
         | 
| 2 2 | 
             
            require 'rbbt/sources/biomart'
         | 
| 3 | 
            +
            require 'rbbt/util/tmpfile'
         | 
| 3 4 | 
             
            require 'test/unit'
         | 
| 4 5 |  | 
| 5 6 | 
             
            class TestBioMart < Test::Unit::TestCase
         | 
| @@ -20,16 +21,28 @@ class TestBioMart < Test::Unit::TestCase | |
| 20 21 |  | 
| 21 22 | 
             
              def test_query
         | 
| 22 23 | 
             
                data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false})
         | 
| 23 | 
            -
             | 
| 24 24 | 
             
                assert(data['852236']['external_gene_id'].include? 'YBL044W')
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                TmpFile.with_file do |f|
         | 
| 27 | 
            +
                  filename = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
         | 
| 28 | 
            +
                  data = TSV.new Open.open(filename)
         | 
| 29 | 
            +
                  assert(data['852236']['external_gene_id'].include? 'YBL044W')
         | 
| 30 | 
            +
                end
         | 
| 25 31 | 
             
              end
         | 
| 26 32 |  | 
| 27 33 | 
             
              def test_tsv
         | 
| 28 34 | 
             
                data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
         | 
| 29 | 
            -
             | 
| 30 35 | 
             
                assert(data['852236']['Protein ID'].include? 'CAA84864')
         | 
| 31 36 | 
             
                assert_equal 'Entrez Gene', data.key_field
         | 
| 32 37 | 
             
                assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                TmpFile.with_file do |f|
         | 
| 40 | 
            +
                  filename = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
         | 
| 41 | 
            +
                  data = TSV.new Open.open(filename, :merge => true)
         | 
| 42 | 
            +
                  assert(data['852236']['Protein ID'].include? 'CAA84864')
         | 
| 43 | 
            +
                  assert_equal 'Entrez Gene', data.key_field
         | 
| 44 | 
            +
                  assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
         | 
| 45 | 
            +
                end
         | 
| 33 46 | 
             
              end
         | 
| 34 47 | 
             
            end
         | 
| 35 48 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: rbbt-sources
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              hash:  | 
| 4 | 
            +
              hash: 15
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
              segments: 
         | 
| 7 7 | 
             
              - 0
         | 
| 8 | 
            -
              -  | 
| 9 | 
            -
              -  | 
| 10 | 
            -
              version: 0. | 
| 8 | 
            +
              - 4
         | 
| 9 | 
            +
              - 0
         | 
| 10 | 
            +
              version: 0.4.0
         | 
| 11 11 | 
             
            platform: ruby
         | 
| 12 12 | 
             
            authors: 
         | 
| 13 13 | 
             
            - Miguel Vazquez
         | 
| @@ -15,7 +15,7 @@ autorequire: | |
| 15 15 | 
             
            bindir: bin
         | 
| 16 16 | 
             
            cert_chain: []
         | 
| 17 17 |  | 
| 18 | 
            -
            date: 2011-03- | 
| 18 | 
            +
            date: 2011-03-23 00:00:00 +01:00
         | 
| 19 19 | 
             
            default_executable: 
         | 
| 20 20 | 
             
            dependencies: 
         | 
| 21 21 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| @@ -95,6 +95,7 @@ files: | |
| 95 95 | 
             
            - lib/rbbt/sources/polysearch.rb
         | 
| 96 96 | 
             
            - lib/rbbt/sources/pubmed.rb
         | 
| 97 97 | 
             
            - share/install/Organism/Hsa/Rakefile
         | 
| 98 | 
            +
            - share/install/Organism/Rno/Rakefile
         | 
| 98 99 | 
             
            - share/install/Organism/Sce/Rakefile
         | 
| 99 100 | 
             
            - share/install/Organism/organism_helpers.rb
         | 
| 100 101 | 
             
            - share/install/lib/helpers.rb
         |