RubyGems - exodb - Versions diffs - 0.1.2 → 0.1.3 - Mend

exodb 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/lib/exodb.rb +23 -0
data/lib/exodb/addon/string.rb +139 -0
data/lib/exodb/constant.rb +64 -0
data/lib/exodb/datamodel.rb +4 -1
data/lib/exodb/datamodel/genelocfield.rb +177 -0
data/lib/exodb/datamodel/generef.rb +193 -0
data/lib/exodb/datamodel/isoform.rb +237 -0
data/lib/exodb/datamodel/reference.rb +23 -327
data/lib/exodb/datamodel/region.rb +7 -5
data/lib/exodb/datamodel/source.rb +1 -10
data/lib/exodb/datamodel/variant.rb +14 -81
data/lib/exodb/datamodel/varlocfield.rb +106 -0
data/lib/exodb/datamodel/xrefsfield.rb +4 -0
data/lib/exodb/extra.rb +17 -0
data/lib/exodb/extra/upload.rb +43 -0
data/lib/exodb/{utils → extra}/upload_generef.rb +35 -21
data/lib/exodb/rositza/load.rb +56 -42
data/lib/exodb/utils.rb +1 -2
data/lib/exodb/utils/ensemblrest.rb +31 -3
data/lib/exodb/utils/miriamrest.rb +23 -0
data/lib/exodb/version.rb +1 -1
metadata +10 -3
data/lib/exodb/datamodel/locationfield.rb +0 -116

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 90ae57a91a48821a343365bcf8d3ab1ca070b46b
-  data.tar.gz: 7d3ab209827f0e05aab2e26378da2249cd627978
+  metadata.gz: 65c5be33ab0ce6072c4a277fd63004603f7ad0e6
+  data.tar.gz: 75641b1dd0443b9424274f45376a0ac90db3c524
 SHA512:
-  metadata.gz: 48ea76c714a19929590b2ebf1ccf454c2ef3be9824bef4e872b42b698e9f63d635d5c737799f1cfd368a1c5c294199877ab11b64b1f242e4b1605ff28e328924
-  data.tar.gz: 954b14a7fda1e2eaa8c2e1cd254116bf64b8ab08c55d1719718a59bc4efeb66272c17f82d60712d36056b2f7567f3bac6d81ba3ba8e71ec1f4921b7b99860eea
+  metadata.gz: cb6dc455d74085166dc9bb0f8b34e8d658a729110ec2f151215f085a7e59e11152564159404202001b3e8b8c42985525321e8a910f5d2cba964fbbfbc2e68565
+  data.tar.gz: d5daaf36ae983c48994160ac413ca03059348e9dedeca2e36be510d8a02cfdc73e8282d992323a6bf7c743bb59bce95639360421429e0abd9196ba2e65a56c7f

data/lib/exodb.rb CHANGED

@@ -36,12 +36,35 @@ require 'exodb/dbconnection.rb'
 require 'exodb/usermanage.rb'
 require 'exodb/datamodel.rb'
 require 'exodb/exception.rb'
+require 'exodb/constant.rb'
 require 'exodb/utils.rb'
 require 'exodb/addon.rb'
+require 'exodb/extra.rb'
 module Exodb
+	@@verbose = true
 	module_function
+	def verbose()
+		@@verbose = true
+	end
+	def noverbose()
+		@@verbose = false
+	end
+	def putstv(str)
+		putst(str) if @@verbose == true
+	end
+	def putst(str)
+		puts "Exodb:STATUS #{str}"
+	end
+	def assembly(str)
+		return Exodb::ASSEMBLY[str.downcase]
+	end
 end

data/lib/exodb/addon/string.rb CHANGED

@@ -11,6 +11,8 @@
 class String
+	# For miriam
 	def is_miriam?
 		return self =~ /^urn:miriam:/
 	end
@@ -23,4 +25,141 @@ class String
 		return self.is_miriam? ? self.split(':', 4)[2] : ''
 	end
+	def resolve
+	end
+	# For HGV
+	def is_hgvs?
+		return self =~ Exodb::HGVPATTERN
+	end
+	def parse_hgvs
+		result = {}
+		Exodb::HGVPATTERN.match(self) do |m|
+			if m[1] =~ /^chr/
+				ref = m[1].split(/\./)
+				result[:chr] = ref[0]
+				result[:assembly] = ref[1].blank? ? Exodb::DEFAULTASSEMBLY : Exodb.assembly(ref[1])
+			elsif m[1] =~ /^(\d{0,2}|[MXY])\./
+				ref = m[1].split(/\./)
+				result[:chr] = "chr#{ref[0]}"
+				result[:assembly] = ref[1].blank? ? Exodb::DEFAULTASSEMBLY : Exodb.assembly(ref[1])
+			else
+				result[:chrrefseq] = m[1]
+			end
+			pos = m[2].split(/_/).sort
+			result[:pos] = pos[0].to_i
+			result[:start] = pos[0].to_i
+			result[:stop] = pos[1].blank? ? pos[0].to_i : pos[1].to_i
+			case m[3]
+			when /^ins/
+				result[:type] = 'ins'
+				result[:alt] = m[3][3..-1]
+			when /^del/
+				result[:type] = 'del'
+				result[:alt] = m[3][3..-1]
+			else
+				result[:type] = 'sub'
+				result[:alt] = m[3].split(/\>/)[1]
+			end
+		end
+		return result
+	end
+	# For Pileup string
+	def count_allele
+		allellset = {
+			'A' => "aA",
+			'T' => "tT",
+			'C' => "cC",
+			'G' => "gG",
+			'.' => "\\.\\,",
+			'*' => "\\*"
+		}
+		tmpstr = self.dup
+		allele = {}
+		self.scan(/([+-])(\d+)([ATCGatcg]+)/) do |a, b, c|
+			pattern = "#{a}#{b}#{c[0,(b.to_i)]}".upcase
+			if !allele.has_key?(pattern)
+				allele[pattern] = 0
+				tmpstr.gsub!(/#{"#{b}#{c[0,(b.to_i)]}"}/, '')
+			end
+			allele[pattern] += 1
+		end
+		allellset.each_pair do |k, v|
+			allele[k] = tmpstr.count(v) if tmpstr.count(v) > 0
+		end
+		return allele
+	end
+	# For pileup var
+	def is_pileup_var?
+		dat = self.split(/\//)
+		return dat[0].is_loc? && dat[1] =~ /[\+\-]?[ATCG]+/
+	end
+	def parse_pileup_var
+		result = {}
+		if self.is_pileup_var?
+			dat = self.split(/\//)
+			result = dat[0].parse_loc
+			dat[1] =~ /([\+\-]?)([ATCG]+)/
+			result[:type] = case $1
+			when '+'
+				'ins'
+			when '-'
+				'del'
+			else
+				'sub'
+			end
+			result[:alt] = dat[1]
+		end
+		return result
+	end
+	# Location String
+	def is_loc?
+		return /^\w+:(\d+|\d+\.\.\d+|\d+-\d+)(:\w+)?$/
+	end
+	# For quality string
+	# Assign gene location in format of chromosome_number:start..stop
+	#
+	# @param [String] gene location in format of chromosome_number:start..stop
+	def parse_loc
+		if self =~ /^[^:]+:(\d+|\d+\.\.\d+|\d+-\d+)(:\w+)?$/
+			dat = self.split(/:/)
+			pos = []
+			dat[1].split(/\.\.|-/).each {|e| pos.push(e.to_i)}
+			pos.sort!
+			return {'chr' => dat[0], 'start' => pos[0], 'pos' => pos[0], 'stop' => pos[1] ? pos[1] : pos[0], 'assembly' => dat[2] ? Exodb::assembly(dat[2]) : Exodb::DEFAULTASSEMBLY}
+		else
+			raise
+		end
+	end
 end

data/lib/exodb/constant.rb ADDED

@@ -0,0 +1,64 @@
+#
+# Exodus
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+module Exodb
+	NAIUPAC = {
+		'Y'	=> 'CT',
+		'R'	=> 'AG',
+		'W'	=> 'AT',
+		'S'	=> 'CG',
+		'K'	=> 'GT',
+		'M'	=> 'AC',
+		'B'	=> 'CGT',
+		'D'	=> 'AGT',
+		'H'	=> 'ACT',
+		'V'	=> 'ACG',
+		'N'	=> 'ACGT',
+		'A'	=> 'A',
+		'T'	=> 'T',
+		'G'	=> 'G',
+		'C'	=> 'C',
+		'U'	=> 'U',
+		'CT' => 'Y',
+		'AG' => 'R',
+		'AT' => 'W',
+		'CG' => 'S',
+		'GT' => 'K',
+		'AC' => 'M',
+		'CGT' => 'B',
+		'AGT' => 'D',
+		'ACT' => 'H',
+		'ACG' => 'V',
+		'ACGT' => 'N'
+	}
+	ASSEMBLY = {
+		'hg19' => 'GRCh37',
+		'hg38' => 'GRCh38',
+		'GRCh37' => 'GRCh37',
+		'GRCh38' => 'GRCh38',
+		'grch37' => 'GRCh37',
+		'grch38' => 'GRCh38'
+	}
+	DEFAULTASSEMBLY = 'GRCh37'
+	LATESTASSEMBLY = 'GRCh38'
+	HGVPATTERN = /^([^:]+):g\.([\-_\d]+)([ATGC]>[ATCG]|del[ATCG]*|ins[ATCG]*)$/
+end

data/lib/exodb/datamodel.rb CHANGED

@@ -11,9 +11,12 @@
 require 'mongoid'
-require 'exodb/datamodel/locationfield.rb'
+require 'exodb/datamodel/genelocfield.rb'
+require 'exodb/datamodel/varlocfield.rb'
 require 'exodb/datamodel/xrefsfield.rb'
 require 'exodb/datamodel/variant.rb'
 require 'exodb/datamodel/reference.rb'
+require 'exodb/datamodel/generef.rb'
+require 'exodb/datamodel/isoform.rb'
 require 'exodb/datamodel/region.rb'
 require 'exodb/datamodel/source.rb'

data/lib/exodb/datamodel/genelocfield.rb ADDED

@@ -0,0 +1,177 @@
+#
+# Exodb
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+module Exodb
+	module GeneLocationField
+		extend ActiveSupport::Concern
+		included do
+			field :start,				type: Integer
+			field :stop,				type: Integer
+			field :chr,					type: String
+			field :seqstart,			type: Integer
+			field :seqstop,				type: Integer
+			field :strand,				type: String
+			field :assembly,			type: String
+			field :sequence,			type: String
+			field :chrrefseq,			type: String # refseq id of chromosome
+			validates_presence_of :start, message: "start field missing"
+			validates_presence_of :stop, message: "stop field missing"
+			validates_presence_of :chr, message: "chr field missing"
+			validates_presence_of :assembly, message: "assembly field missing"
+			index({start: 1, stop: 1, chr: 1, assembly: 1}, background: true)
+		end
+		module ClassMethods
+			def where_cover(loc_str)
+				dat = parse_locstr(loc_str)
+				return self.where({chr: dat['chr'], assembly: dat['assembly']}).lte(start: dat['start']).gte(stop: dat['stop'])
+			end
+			def where_intersect(loc_str)
+				dat = parse_locstr(loc_str)
+				return self.where({chr: dat['chr'], assembly: dat['assembly']}).or({:start.lte => dat['start'], :stop.gte => dat['start']}, {:start.lte => dat['stop'], :stop.gte => dat['stop']})
+			end
+			def where_in(loc_str)
+				dat = parse_locstr(loc_str)
+				return self.where({chr: dat['chr'], assembly: dat['assembly']}).gte(start: dat['start']).lte(stop: dat['stop'])
+			end
+			def where_ups_cover(loc_str)
+				#code
+			end
+		end
+		# Download sequence from web service please use by caution. NCBI will block scamming sequest
+		#
+		def dl_seq!
+			case self.chrrefseq
+			when /\Aurn:miriam:refseq:/
+				self.sequence = Bio::FastaFormat.new(Bio::NCBI::REST.efetch(self.chrrefseq.split(':', 4), {"db"=>"nucleotide", "rettype"=>"fasta", "retmode"=>"text", "seq_start"=>self.start, "seq_stop"=>self.end})).seq
+			else
+				self.sequence = Exodb::Ensembl::REST.sequence_region()
+			end
+			self.save!
+		end
+		# get the start position of gene rely on the genome
+		#
+		# @return [Integer] start position of gene
+		def start
+			self[:start]
+		end
+		# get the end position of gene rely on the genome
+		#
+		# @return [Integer] end position of gene
+		def stop
+			self[:stop]
+		end
+		# get the start position of gene rely on the genome
+		#
+		# @return [Integer] start position of gene
+		def begin
+			self[:strand] == '+' ? self[:start] : self[:stop]
+		end
+		# get the start position of gene rely on the genome
+		#
+		# @return [Integer] start position of gene
+		def end
+			self[:strand] == '+' ? self[:stop] : self[:start]
+		end
+		# get the chromosome
+		#
+		# @return [Integer] chromosome
+		def chromosome
+			self[:chr]
+		end
+		# Assign location
+		#
+		# @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
+		def location=(loc)
+			if loc.is_a?(String)
+				begin
+					loc.parse_loc.delete_if {|k, v| k == 'pos'}.each_pair do |k, v|
+						self[k.to_sym] = v
+					end
+				rescue
+				end
+			end
+		end
+		# Return location
+		#
+		# @return [String] location string in chromosome:position
+		def location_str
+			return "#{self.chromosome}:#{[self.start, self.stop].uniq.join('..')}"
+		end
+		alias_method :locstr, :location_str
+		# Return gene sequence
+		#
+		# @return [Bio::Sequence] gene sequence
+		def to_seq
+			whole_seq.splice("#{self[:start] - self[:seqstart] + 1}..#{self[:stop] - self[:seqstart] + 1}")
+		end
+		# Return whole deposited sequence
+		#
+		# @return [Bio::Sequence] gene sequence
+		def whole_seq
+			Bio::Sequence::NA.new(self[:sequence])
+		end
+		# join exon or cds position into a string
+		#
+		# @param [Array] input array exon or cds
+		# @param [Interger] Position to stop positive value for forward read negative value for complement
+		#
+		# @return [String] a string in start..end,start..end,...
+		def get_splice(arr, strand = nil)
+			strand = strand || self[:strand]
+			reducer = self[:seqlocation]['start'] - 1
+			str = []
+			arr.each do |e|
+				str.push("#{e[0] - reducer}..#{e[1] - reducer}")
+			end
+			return strand == '+' ? self.to_seq.splicing("join(#{str.join(',')})") : self.to_seq.splicing("complement(join(#{str.join(',')}))")
+		end
+	end
+end