exodb 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/exodb.rb +23 -0
- data/lib/exodb/addon/string.rb +139 -0
- data/lib/exodb/constant.rb +64 -0
- data/lib/exodb/datamodel.rb +4 -1
- data/lib/exodb/datamodel/genelocfield.rb +177 -0
- data/lib/exodb/datamodel/generef.rb +193 -0
- data/lib/exodb/datamodel/isoform.rb +237 -0
- data/lib/exodb/datamodel/reference.rb +23 -327
- data/lib/exodb/datamodel/region.rb +7 -5
- data/lib/exodb/datamodel/source.rb +1 -10
- data/lib/exodb/datamodel/variant.rb +14 -81
- data/lib/exodb/datamodel/varlocfield.rb +106 -0
- data/lib/exodb/datamodel/xrefsfield.rb +4 -0
- data/lib/exodb/extra.rb +17 -0
- data/lib/exodb/extra/upload.rb +43 -0
- data/lib/exodb/{utils → extra}/upload_generef.rb +35 -21
- data/lib/exodb/rositza/load.rb +56 -42
- data/lib/exodb/utils.rb +1 -2
- data/lib/exodb/utils/ensemblrest.rb +31 -3
- data/lib/exodb/utils/miriamrest.rb +23 -0
- data/lib/exodb/version.rb +1 -1
- metadata +10 -3
- data/lib/exodb/datamodel/locationfield.rb +0 -116
| @@ -22,7 +22,6 @@ module Exodb | |
| 22 22 | 
             
            	class Gene < Region
         | 
| 23 23 |  | 
| 24 24 | 
             
            		include Mongoid::Versioning
         | 
| 25 | 
            -
            		include Exodb::GenomeLocationField
         | 
| 26 25 |  | 
| 27 26 | 
             
            		field :symbol,				type: String
         | 
| 28 27 | 
             
            		field :loh,					type: Boolean
         | 
| @@ -31,7 +30,8 @@ module Exodb | |
| 31 30 | 
             
            		belongs_to :generef
         | 
| 32 31 | 
             
            		belongs_to :cell
         | 
| 33 32 |  | 
| 34 | 
            -
            		index({'symbol' => 1 | 
| 33 | 
            +
            		index({'symbol' => 1}, background: true)
         | 
| 34 | 
            +
            		
         | 
| 35 35 | 
             
            	end
         | 
| 36 36 |  | 
| 37 37 | 
             
            	class Change
         | 
| @@ -47,12 +47,14 @@ module Exodb | |
| 47 47 | 
             
            	class Onexon < Change
         | 
| 48 48 |  | 
| 49 49 | 
             
            		field :aaposition,			type: Integer # position referenced to the first codon from the longest splice variant
         | 
| 50 | 
            -
            		field : | 
| 50 | 
            +
            		field :refna,				type: String
         | 
| 51 51 | 
             
            		field :refaa,				type: String
         | 
| 52 | 
            -
            		field : | 
| 53 | 
            -
            		field : | 
| 52 | 
            +
            		field :altna,				type: Hash
         | 
| 53 | 
            +
            		field :inhna,				type: Hash
         | 
| 54 54 | 
             
            		field :isoform,				type: Array
         | 
| 55 55 |  | 
| 56 | 
            +
            		index({'aaposition' => 1}, background: true)
         | 
| 57 | 
            +
            		
         | 
| 56 58 | 
             
            	end
         | 
| 57 59 |  | 
| 58 60 | 
             
            	class Aa < Onexon
         | 
| @@ -41,6 +41,7 @@ module Exodb | |
| 41 41 | 
             
            		field :paired,				type: Boolean
         | 
| 42 42 | 
             
            		field :purity,				type: Float
         | 
| 43 43 | 
             
            		field :labels,				type: Hash
         | 
| 44 | 
            +
            		field :metastasis,			type: Boolean
         | 
| 44 45 |  | 
| 45 46 | 
             
            		default_scope ->{where(preferred: true)}
         | 
| 46 47 |  | 
| @@ -109,14 +110,4 @@ module Exodb | |
| 109 110 |  | 
| 110 111 | 
             
            	end
         | 
| 111 112 |  | 
| 112 | 
            -
            	class Tumor < Cell
         | 
| 113 | 
            -
            		
         | 
| 114 | 
            -
            		field :metastasis,			type: Boolean
         | 
| 115 | 
            -
            		
         | 
| 116 | 
            -
            	end
         | 
| 117 | 
            -
            	
         | 
| 118 | 
            -
            	class Normal < Cell
         | 
| 119 | 
            -
            		
         | 
| 120 | 
            -
            	end
         | 
| 121 | 
            -
            	
         | 
| 122 113 | 
             
            end
         | 
| @@ -17,17 +17,14 @@ module Exodb | |
| 17 17 | 
             
            		include Mongoid::Versioning
         | 
| 18 18 | 
             
            		include Mongoid::Timestamps
         | 
| 19 19 |  | 
| 20 | 
            -
            		include Exodb:: | 
| 20 | 
            +
            		include Exodb::VarLocationField
         | 
| 21 21 |  | 
| 22 22 | 
             
            		#max_versions 5
         | 
| 23 23 |  | 
| 24 | 
            -
            		#PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
         | 
| 25 | 
            -
            		#SILENTSIGN = '='
         | 
| 26 | 
            -
            		
         | 
| 27 24 | 
             
            		field :oid,					type: String # chromosome:position..alternative:samplename
         | 
| 28 25 | 
             
            		field :reference,			type: String #reference genotype
         | 
| 29 | 
            -
            		field : | 
| 30 | 
            -
            		field : | 
| 26 | 
            +
            		field :genotypet,			type: Array # genotype from tumor cell
         | 
| 27 | 
            +
            		field :genotypen,			type: Array # genotype from normal cell
         | 
| 31 28 | 
             
            		field :filter,				type: String
         | 
| 32 29 | 
             
            		field :passfilter,			type: Boolean #, default: false
         | 
| 33 30 | 
             
            		field :somstatus,			type: String #unknown, inherited, somatic
         | 
| @@ -39,16 +36,17 @@ module Exodb | |
| 39 36 | 
             
            		field :qualt,				type: String #pileup quality from tumor cell
         | 
| 40 37 | 
             
            		field :qualn,				type: String #pileup quality from normal cell
         | 
| 41 38 | 
             
            		field :predicted_damage,	type: Boolean #Temporaly field
         | 
| 42 | 
            -
            		field : | 
| 39 | 
            +
            		field :evidence,			type: Array # experimental evidence of the variant
         | 
| 40 | 
            +
            		field :temp,				type: Hash #Temporaly field
         | 
| 43 41 |  | 
| 44 42 | 
             
            		belongs_to :cell
         | 
| 45 43 | 
             
            		belongs_to :aacid
         | 
| 46 44 |  | 
| 47 45 | 
             
            		validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
         | 
| 48 46 |  | 
| 49 | 
            -
            		 | 
| 50 | 
            -
             | 
| 51 | 
            -
            		 | 
| 47 | 
            +
            		index({oid: 1, reference: 1, alternatet: 1, passfilter: 1, somstatus: 1, somscore: 1, inhscore: 1, predicted_damage: 1}, background: true)
         | 
| 48 | 
            +
            		
         | 
| 49 | 
            +
            		before_save :update_oid
         | 
| 52 50 |  | 
| 53 51 | 
             
            		# add this variant to original cell sample
         | 
| 54 52 | 
             
            		# 
         | 
| @@ -64,68 +62,10 @@ module Exodb | |
| 64 62 | 
             
            				#output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
         | 
| 65 63 | 
             
            			end
         | 
| 66 64 |  | 
| 67 | 
            -
            			self.oid = "#{self.location_str}:#{sample.first().oid}"
         | 
| 68 | 
            -
            			
         | 
| 69 65 | 
             
            		end
         | 
| 70 66 |  | 
| 71 67 | 
             
            		alias_method :add_to_cell, :add_to_sample
         | 
| 72 68 |  | 
| 73 | 
            -
            		## overwrite default assignment of pileuplinet
         | 
| 74 | 
            -
            		## automatically update :reference, :alternatet, and location
         | 
| 75 | 
            -
            		## 
         | 
| 76 | 
            -
            		## @param [String] pile-up line from mpileup
         | 
| 77 | 
            -
            		#def pileuplinet=(pileupline)
         | 
| 78 | 
            -
            		#	begin
         | 
| 79 | 
            -
            		#		if pileupline.is_a?(Bio::DB::Pileup)
         | 
| 80 | 
            -
            		#			pile = pileupline
         | 
| 81 | 
            -
            		#		else
         | 
| 82 | 
            -
            		#			pile = Bio::DB::Pileup.new(pileupline.chomp)
         | 
| 83 | 
            -
            		#		end
         | 
| 84 | 
            -
            		#		
         | 
| 85 | 
            -
            		#		self[:reference] = pile.ref_base
         | 
| 86 | 
            -
            		#		self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
         | 
| 87 | 
            -
            		#		self[:alternatet] = pile.genotype_list
         | 
| 88 | 
            -
            		#		self[:pileuplinet] = pileupline
         | 
| 89 | 
            -
            		#	rescue
         | 
| 90 | 
            -
            		#		self[:pileuplinet] = pileupline
         | 
| 91 | 
            -
            		#	end
         | 
| 92 | 
            -
            		#end
         | 
| 93 | 
            -
            		#
         | 
| 94 | 
            -
            		## overwrite default assignment of pileuplinen
         | 
| 95 | 
            -
            		## automatically update :alternaten
         | 
| 96 | 
            -
            		## 
         | 
| 97 | 
            -
            		## @param [String] pile-up line from mpileup
         | 
| 98 | 
            -
            		#def pileuplinen=(pileupline)
         | 
| 99 | 
            -
            		#	begin
         | 
| 100 | 
            -
            		#		if pileupline.is_a?(Bio::DB::Pileup)
         | 
| 101 | 
            -
            		#			pile = pileupline
         | 
| 102 | 
            -
            		#		else
         | 
| 103 | 
            -
            		#			pile = Bio::DB::Pileup.new(pileupline.chomp)
         | 
| 104 | 
            -
            		#		end
         | 
| 105 | 
            -
            		#		
         | 
| 106 | 
            -
            		#		if self[:reference] == pile.ref_base
         | 
| 107 | 
            -
            		#			self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
         | 
| 108 | 
            -
            		#			self[:alternaten] = pile.genotype_list
         | 
| 109 | 
            -
            		#			self[:pileuplinen] = pileupline
         | 
| 110 | 
            -
            		#		end
         | 
| 111 | 
            -
            		#	rescue
         | 
| 112 | 
            -
            		#		self[:pileuplinen] = pileupline
         | 
| 113 | 
            -
            		#	end
         | 
| 114 | 
            -
            		#end
         | 
| 115 | 
            -
            		#
         | 
| 116 | 
            -
            		## overwrite default read of pileuplinet
         | 
| 117 | 
            -
            		## 
         | 
| 118 | 
            -
            		## @return [Bio::DB::Pileup] pile-up object of tumor sample
         | 
| 119 | 
            -
            		#def pileuplinet()
         | 
| 120 | 
            -
            		#	return Bio::DB::Pileup.new(self[:pileuplinet])
         | 
| 121 | 
            -
            		#end
         | 
| 122 | 
            -
            		#
         | 
| 123 | 
            -
            		## overwrite default read of pileuplinet
         | 
| 124 | 
            -
            		## 
         | 
| 125 | 
            -
            		## @return [Bio::DB::Pileup] pile-up object of normal sample
         | 
| 126 | 
            -
            		#def pileuplinen()
         | 
| 127 | 
            -
            		#	return Bio::DB::Pileup.new(self[:pileuplinen])
         | 
| 128 | 
            -
            		#end
         | 
| 129 69 |  | 
| 130 70 | 
             
            		# apply filter to the variant
         | 
| 131 71 | 
             
            		# the result is kelp in passfilter
         | 
| @@ -134,23 +74,16 @@ module Exodb | |
| 134 74 | 
             
            		def apply_filter(filter)
         | 
| 135 75 | 
             
            			self[:filter] = filter
         | 
| 136 76 | 
             
            		end
         | 
| 137 | 
            -
            	end
         | 
| 138 | 
            -
            	
         | 
| 139 | 
            -
            	class SNV < Variant
         | 
| 140 77 |  | 
| 141 | 
            -
            		 | 
| 142 | 
            -
             | 
| 143 | 
            -
            			
         | 
| 78 | 
            +
            		def alternate
         | 
| 79 | 
            +
            			return self[:genotypet] - [self[:reference]]
         | 
| 144 80 | 
             
            		end
         | 
| 145 81 |  | 
| 146 | 
            -
             | 
| 147 | 
            -
            	
         | 
| 148 | 
            -
            	class Indel < Variant #
         | 
| 149 | 
            -
            		
         | 
| 150 | 
            -
            	end
         | 
| 151 | 
            -
            	
         | 
| 152 | 
            -
            	class CNV < Variant
         | 
| 82 | 
            +
            		protected
         | 
| 153 83 |  | 
| 84 | 
            +
            		def update_oid
         | 
| 85 | 
            +
            			self.oid = "#{self.location_str}:#{self.location['assembly']}:#{self.cell.oid}"
         | 
| 86 | 
            +
            		end
         | 
| 154 87 | 
             
            	end
         | 
| 155 88 |  | 
| 156 89 | 
             
            end
         | 
| @@ -0,0 +1,106 @@ | |
| 1 | 
            +
            # 
         | 
| 2 | 
            +
            # Exodb
         | 
| 3 | 
            +
            # Copyright (C) 2014
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            # author: Natapol Pornputtapong <natapol.por@gmail.com>
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            module Exodb
         | 
| 13 | 
            +
                
         | 
| 14 | 
            +
            	module VarLocationField
         | 
| 15 | 
            +
            		
         | 
| 16 | 
            +
            		extend ActiveSupport::Concern
         | 
| 17 | 
            +
            		
         | 
| 18 | 
            +
            		included do
         | 
| 19 | 
            +
            			field :location,		type: Hash #{chr: '', position: x, assembly: x}
         | 
| 20 | 
            +
            			field :convlocation,	type: Array
         | 
| 21 | 
            +
            			
         | 
| 22 | 
            +
            			validates_presence_of :location
         | 
| 23 | 
            +
            			
         | 
| 24 | 
            +
            			index({location: 1}, background: true)
         | 
| 25 | 
            +
            			
         | 
| 26 | 
            +
            		end
         | 
| 27 | 
            +
            		
         | 
| 28 | 
            +
            		module ClassMethods
         | 
| 29 | 
            +
            			
         | 
| 30 | 
            +
            			
         | 
| 31 | 
            +
            		end
         | 
| 32 | 
            +
            		
         | 
| 33 | 
            +
            		# get the chromosome
         | 
| 34 | 
            +
            		#
         | 
| 35 | 
            +
            		# @return [Integer] chromosome
         | 
| 36 | 
            +
            		def chromosome
         | 
| 37 | 
            +
            			self[:location]['chr']
         | 
| 38 | 
            +
            		end
         | 
| 39 | 
            +
            		
         | 
| 40 | 
            +
            		# Assign gene location in format of chromosome_number:pos;build
         | 
| 41 | 
            +
            		#
         | 
| 42 | 
            +
            		# @param [String] gene location in format of chromosome_number:start..stop
         | 
| 43 | 
            +
            		def parse_locstr(loc_str)
         | 
| 44 | 
            +
            			
         | 
| 45 | 
            +
            			dat = loc_str.split(/:/)
         | 
| 46 | 
            +
            			
         | 
| 47 | 
            +
            			return {'chr' => dat[0],
         | 
| 48 | 
            +
            				'position' => dat[1].to_i,
         | 
| 49 | 
            +
            				'assembly' => dat[2] ? Exodb::ASSEMBLY.has_key?(dat[2]) ? Exodb::ASSEMBLY[dat[2]] : dat[2] : Exodb::DEFAULTASSEMBLY
         | 
| 50 | 
            +
            			}
         | 
| 51 | 
            +
            			
         | 
| 52 | 
            +
            		end
         | 
| 53 | 
            +
            		
         | 
| 54 | 
            +
            		# Return location from specific genome assembly
         | 
| 55 | 
            +
            		#
         | 
| 56 | 
            +
            		# @param [String] assembly version
         | 
| 57 | 
            +
            		#
         | 
| 58 | 
            +
            		# @return [String] location string in chromosome:position
         | 
| 59 | 
            +
            		def location_str(assembly = nil)
         | 
| 60 | 
            +
            			
         | 
| 61 | 
            +
            			result = nil
         | 
| 62 | 
            +
            			
         | 
| 63 | 
            +
            			if assembly == nil || Exodb::ASSEMBLY[assembly] == self[:location]['assembly']
         | 
| 64 | 
            +
            				result = "#{self[:location]['chr']}:#{self[:location]['position']}:#{self[:location]['assembly']}"
         | 
| 65 | 
            +
            			else
         | 
| 66 | 
            +
            				self[:convlocation].each {|e| result = "#{[e['chr'], e['position'], e['assembly']].join(':')}" if e['assembly'] == Exodb::ASSEMBLY[assembly]}
         | 
| 67 | 
            +
            			end
         | 
| 68 | 
            +
            			
         | 
| 69 | 
            +
            			return result
         | 
| 70 | 
            +
            		end
         | 
| 71 | 
            +
            		
         | 
| 72 | 
            +
            		# Assign location
         | 
| 73 | 
            +
            		#
         | 
| 74 | 
            +
            		# @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
         | 
| 75 | 
            +
            		def location=(loc)
         | 
| 76 | 
            +
            			
         | 
| 77 | 
            +
            			if loc.is_a?(String)
         | 
| 78 | 
            +
            				
         | 
| 79 | 
            +
            				begin
         | 
| 80 | 
            +
            					self[:location] = parse_locstr(loc).delete_if {|k, v| ['start', 'stop'].include?(k)}
         | 
| 81 | 
            +
            				rescue
         | 
| 82 | 
            +
            					
         | 
| 83 | 
            +
            				end
         | 
| 84 | 
            +
            				
         | 
| 85 | 
            +
            			end
         | 
| 86 | 
            +
            		end
         | 
| 87 | 
            +
            		
         | 
| 88 | 
            +
            		# Assign location
         | 
| 89 | 
            +
            		#
         | 
| 90 | 
            +
            		# @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
         | 
| 91 | 
            +
            		def convlocation=(loc)
         | 
| 92 | 
            +
            			
         | 
| 93 | 
            +
            			if loc.is_a?(String)
         | 
| 94 | 
            +
            				
         | 
| 95 | 
            +
            				begin
         | 
| 96 | 
            +
            					self[:convlocation].push(parse_locstr(loc).delete_if {|k, v| ['start', 'stop'].include?(k)})
         | 
| 97 | 
            +
            				rescue
         | 
| 98 | 
            +
            					
         | 
| 99 | 
            +
            				end
         | 
| 100 | 
            +
            				
         | 
| 101 | 
            +
            			end
         | 
| 102 | 
            +
            			
         | 
| 103 | 
            +
            		end
         | 
| 104 | 
            +
            	end
         | 
| 105 | 
            +
                
         | 
| 106 | 
            +
            end
         | 
    
        data/lib/exodb/extra.rb
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            # 
         | 
| 2 | 
            +
            # Exodb
         | 
| 3 | 
            +
            # Copyright (C) 2014
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            # author: Natapol Pornputtapong <natapol.por@gmail.com>
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            require 'exodb/extra/upload_generef.rb'
         | 
| 13 | 
            +
            require 'exodb/extra/upload.rb'
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            module Exodb
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            end
         | 
| @@ -0,0 +1,43 @@ | |
| 1 | 
            +
            # 
         | 
| 2 | 
            +
            # Exodus
         | 
| 3 | 
            +
            # Copyright (C) 2014
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            # author: Natapol Pornputtapong <natapol.por@gmail.com>
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            module Exodb
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            	class Mapping
         | 
| 16 | 
            +
            		
         | 
| 17 | 
            +
            		def self.load_assembly_mapping!(from, to)
         | 
| 18 | 
            +
            			
         | 
| 19 | 
            +
            			self.where(from: Exodb::assembly(from), to: Exodb::assembly(to))
         | 
| 20 | 
            +
            			
         | 
| 21 | 
            +
            			Exodb::Chrref.where({:assembly => Exodb::assembly(from), 'location.chr' => /\Achr/}).each do |chr|
         | 
| 22 | 
            +
            				
         | 
| 23 | 
            +
            				Exodb::Ensembl::REST.assembly_map(chr.locstr =~ /\Achr/ ? chr.locstr[3..-1] : chr.locstr,  Exodb::assembly(to),  Exodb::assembly(from))["mappings"].each do |map|
         | 
| 24 | 
            +
            					
         | 
| 25 | 
            +
            					#map = {"original"=>{"seq_region_name"=>"1", "strand"=>1, "coordinate_system"=>"chromosome", "end"=>235674528, "start"=>235669016, "assembly"=>"GRCh37"}, "mapped"=>{"seq_region_name"=>"1", "strand"=>1, "coordinate_system"=>"chromosome", "end"=>235511225, "start"=>235505713, "assembly"=>"GRCh38"}}
         | 
| 26 | 
            +
            					
         | 
| 27 | 
            +
            					mapping = self.new()
         | 
| 28 | 
            +
            					mapping.chr = "#{map['original']["coordinate_system"] == "chromosome" ? 'chr' : ''}#{map['original']["seq_region_name"]}"
         | 
| 29 | 
            +
            					mapping.start = map['original']["start"]
         | 
| 30 | 
            +
            					mapping.stop = map['original']["end"]
         | 
| 31 | 
            +
            					mapping.from = map['original']["assembly"]
         | 
| 32 | 
            +
            					mapping.tchr = "#{map['mapped']["coordinate_system"] == "chromosome" ? 'chr' : ''}#{map['mapped']["seq_region_name"]}"
         | 
| 33 | 
            +
            					mapping.tstart = map['mapped']["start"]
         | 
| 34 | 
            +
            					mapping.tstop = map['mapped']["end"]
         | 
| 35 | 
            +
            					mapping.coeff = map['mapped']["strand"]
         | 
| 36 | 
            +
            					mapping.to = map['mapped']["assembly"]
         | 
| 37 | 
            +
            					
         | 
| 38 | 
            +
            					p mapping.save!
         | 
| 39 | 
            +
            				end
         | 
| 40 | 
            +
            			end
         | 
| 41 | 
            +
            		end
         | 
| 42 | 
            +
            	end
         | 
| 43 | 
            +
            end
         | 
| @@ -17,10 +17,11 @@ module Exodb | |
| 17 17 | 
             
            		module_function
         | 
| 18 18 |  | 
| 19 19 | 
             
            		# Upload gene information to database using gff3 and genome sequence fasta file
         | 
| 20 | 
            +
            		# Exodb::Utils.upload_generef_from_gff3('ref_GRCh37.p5_top_level.gff3')
         | 
| 20 21 | 
             
            		#
         | 
| 21 22 | 
             
            		# @param [String] gff3 file
         | 
| 22 23 | 
             
            		# @param [String] assembly name [default: gff file name]
         | 
| 23 | 
            -
            		def upload_generef_from_gff3(filename, assembly =  | 
| 24 | 
            +
            		def upload_generef_from_gff3(filename, assembly = Exodb::DEFAULTASSEMBLY)
         | 
| 24 25 |  | 
| 25 26 | 
             
            			gff = Bio::GFF::GFF3.new(File.open(filename).read)
         | 
| 26 27 |  | 
| @@ -45,12 +46,12 @@ module Exodb | |
| 45 46 | 
             
            				end
         | 
| 46 47 | 
             
            			end
         | 
| 47 48 |  | 
| 48 | 
            -
            			assembly = assembly ?  | 
| 49 | 
            +
            			assembly = assembly.blank? ? Exodb::DEFAULTASSEMBLY : Exodb::ASSEMBLY[assembly.downcase]
         | 
| 49 50 |  | 
| 50 51 | 
             
            			regions = {}
         | 
| 51 52 | 
             
            			genes = {}
         | 
| 52 53 | 
             
            			seq = {}
         | 
| 53 | 
            -
            			
         | 
| 54 | 
            +
            			regions.default='chr0'
         | 
| 54 55 |  | 
| 55 56 | 
             
            			gff.records.each do |e|
         | 
| 56 57 |  | 
| @@ -59,10 +60,16 @@ module Exodb | |
| 59 60 | 
             
            					e.attributes.each do |attr|
         | 
| 60 61 | 
             
            						case attr[0]
         | 
| 61 62 | 
             
            						when 'chromosome'
         | 
| 62 | 
            -
            							regions[e.seqname] =  | 
| 63 | 
            +
            							regions[e.seqname] = e.seqname =~ /\ANC_/ ? "chr#{attr[1]}" : e.seqname
         | 
| 63 64 | 
             
            						end
         | 
| 64 65 | 
             
            					end
         | 
| 65 66 |  | 
| 67 | 
            +
            					chr = Exodb::Chrref.new()
         | 
| 68 | 
            +
            					chr.location=("#{regions.has_key?(e.seqname) ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}:#{assembly}")
         | 
| 69 | 
            +
            					chr.oid = "#{chr.chr}:#{assembly}"
         | 
| 70 | 
            +
            					chr.add_to_set(:xrefs, guess_miriam(e.seqname))
         | 
| 71 | 
            +
            					chr.save!
         | 
| 72 | 
            +
            					
         | 
| 66 73 | 
             
            					if File.exist?("./genome/#{e.seqname}.fa")
         | 
| 67 74 | 
             
            						seq = {}
         | 
| 68 75 | 
             
            						Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
         | 
| @@ -70,7 +77,7 @@ module Exodb | |
| 70 77 |  | 
| 71 78 | 
             
            				when 'gene', 'tRNA'
         | 
| 72 79 |  | 
| 73 | 
            -
            					gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{ | 
| 80 | 
            +
            					gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{regions[e.seqname]}:#{e.start}..#{e.end}:#{assembly}", childs: [], exon: [], cds: []}
         | 
| 74 81 |  | 
| 75 82 | 
             
            					e.attributes.each do |attr|
         | 
| 76 83 | 
             
            						case attr[0]
         | 
| @@ -85,12 +92,14 @@ module Exodb | |
| 85 92 | 
             
            						end
         | 
| 86 93 | 
             
            					end
         | 
| 87 94 |  | 
| 88 | 
            -
            					gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
         | 
| 89 | 
            -
            					gene[: | 
| 95 | 
            +
            					gene[:sequence] = seq[e.seqname].subseq(e.start.to_i - Exodb::Generef.expanding, e.end.to_i + Exodb::Generef.expanding).to_s if seq.has_key?(e.seqname)
         | 
| 96 | 
            +
            					gene[:seqstart] = e.start.to_i - Exodb::Generef.expanding
         | 
| 97 | 
            +
            					gene[:seqstop] = e.end.to_i + Exodb::Generef.expanding
         | 
| 98 | 
            +
            					gene[:oid] = "#{gene[:location]}:#{assembly}"
         | 
| 90 99 | 
             
            					genes[gene[:id]] = gene
         | 
| 91 100 |  | 
| 92 101 | 
             
            				when /\A(transcript|[^t]*RNA)/
         | 
| 93 | 
            -
            					rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
         | 
| 102 | 
            +
            					rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}:#{assembly}", exon: [], cds: []}
         | 
| 94 103 |  | 
| 95 104 | 
             
            					e.attributes.each do |attr|
         | 
| 96 105 | 
             
            						case attr[0]
         | 
| @@ -125,20 +134,18 @@ module Exodb | |
| 125 134 | 
             
            				end
         | 
| 126 135 | 
             
            			end
         | 
| 127 136 |  | 
| 128 | 
            -
            			 | 
| 129 | 
            -
             | 
| 137 | 
            +
            			count = {succ: 0, fail: 0}
         | 
| 138 | 
            +
            			
         | 
| 139 | 
            +
            			genes.each_pair do |id, entry|
         | 
| 140 | 
            +
            				if entry[:type] == 'gene'
         | 
| 130 141 |  | 
| 131 | 
            -
            					gene = Generef.new()
         | 
| 132 | 
            -
            					gene.oid = v[:oid] if v.has_key?(:oid)
         | 
| 133 | 
            -
            					gene.xrefs = v[:xrefs]
         | 
| 134 | 
            -
            					gene.parse_location(v[:location])
         | 
| 135 | 
            -
            					gene.chrrefseq = v[:chrrefseq]
         | 
| 136 | 
            -
            					gene.strand = v[:strand]
         | 
| 137 | 
            -
            					gene.psuedo = v[:psuedo] if v[:psuedo]
         | 
| 138 | 
            -
            					gene.genomeref = assembly
         | 
| 139 | 
            -
            					gene.sequence = v[:sequence] if v.has_key?(:sequence)
         | 
| 142 | 
            +
            					gene = Exodb::Generef.new()
         | 
| 140 143 |  | 
| 141 | 
            -
            					 | 
| 144 | 
            +
            					entry.each_pair do |k, v|
         | 
| 145 | 
            +
            						gene.method(:"#{k}=").call(v) if ![:type, :childs, :exon, :cds].include?(k)
         | 
| 146 | 
            +
            					end
         | 
| 147 | 
            +
            					
         | 
| 148 | 
            +
            					entry[:childs].each do |child|
         | 
| 142 149 |  | 
| 143 150 | 
             
            						rna = Isoform.new()
         | 
| 144 151 | 
             
            						data = genes[child]
         | 
| @@ -150,12 +157,19 @@ module Exodb | |
| 150 157 |  | 
| 151 158 | 
             
            					end
         | 
| 152 159 |  | 
| 153 | 
            -
            					 | 
| 160 | 
            +
            					if gene.save!
         | 
| 161 | 
            +
            						count[:succ] += 1
         | 
| 162 | 
            +
            						Exodb::putstv "Deposit Gene reference #{gene.xrefs[0]}"
         | 
| 163 | 
            +
            					else
         | 
| 164 | 
            +
            						count[:fail] += 1
         | 
| 165 | 
            +
            						Exodb::putstv "Deposit Gene reference #{gene.xrefs[0]}"
         | 
| 166 | 
            +
            					end
         | 
| 154 167 |  | 
| 155 168 | 
             
            				end
         | 
| 156 169 |  | 
| 157 170 | 
             
            			end
         | 
| 158 171 |  | 
| 172 | 
            +
            			Exodb::putst "SUCCESS: #{count[:succ]} , FAIL: #{count[:fail]}"
         | 
| 159 173 |  | 
| 160 174 | 
             
            		end
         | 
| 161 175 | 
             
            	end
         |