RubyGems - exodb - Versions diffs - 0.0.2 - Mend

exodb 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/contributors.txt +1 -0
data/exodb.gemspec +35 -0
data/genome/process_genome_seq.rb +22 -0
data/lib/exodb/addon/string.rb +26 -0
data/lib/exodb/addon.rb +13 -0
data/lib/exodb/datamodel/locationfield.rb +103 -0
data/lib/exodb/datamodel/reference.rb +387 -0
data/lib/exodb/datamodel/region.rb +51 -0
data/lib/exodb/datamodel/source.rb +122 -0
data/lib/exodb/datamodel/variant.rb +89 -0
data/lib/exodb/datamodel/xrefsfield.rb +42 -0
data/lib/exodb/datamodel.rb +19 -0
data/lib/exodb/dbconnection.rb +83 -0
data/lib/exodb/exception.rb +18 -0
data/lib/exodb/usermanage.rb +84 -0
data/lib/exodb/utils/upload_generef.rb +163 -0
data/lib/exodb/utils/upload_var.rb +60 -0
data/lib/exodb/utils.rb +42 -0
data/lib/exodb/vcf.rb +193 -0
data/lib/exodb/version.rb +15 -0
data/lib/exodb.rb +44 -0
data/session.yml +6 -0
metadata +122 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: bd0cec2b7ef4791ab9686827bf8d31409152af2a
+  data.tar.gz: 117c7eb770d5473a76909af778017f2557288982
+SHA512:
+  metadata.gz: 6b7cd7602e7bda8da25c1799e5fbc0fd3221e04397b52a54cb4c1b5b2691b6cfea96d1e114bfc1f218529d747614544876da7eb78d8258ef3ee41a082d22eee2
+  data.tar.gz: 5699679e8fb8834af7c4e7f42ca2f399c549c015b1fa07781240a9a9f76d01a56951ba72af04645254232d588a6c1c9424034f23ec03791098bd57a0e3d5bf3c

data/contributors.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ Natapol Pornputtapong

data/exodb.gemspec ADDED Viewed

@@ -0,0 +1,35 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "exodb/version"
+Gem::Specification.new do |s|
+    s.name          = 'exodb'
+    s.version       = Exodb::VERSION
+    s.date          = '2014-10-31'
+    s.platform      = Gem::Platform::RUBY
+    s.summary       = "A library for exome sequencing data management development"
+    s.description   = "A library for exome sequencing data management"
+    s.authors       = ["Natapol Pornputtapong"]
+    s.email         = 'natapol.por@gmail.com'
+    s.homepage      = 'http://rubygems.org/gems/dactyls'
+    s.license       = 'GPL'
+#    s.rubyforge_project = "neography"
+    s.files         = `git ls-files`.split("\n")
+    s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+    s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+    s.require_paths = ["lib"]
+    s.add_dependency "mongoid", "~> 3.1"
+    s.add_dependency "bio", "~> 1.4"
+    s.add_dependency "highline", "~> 1.6"
+    s.add_dependency "pry", "~> 0.10"
+#    s.add_development_dependency "rspec", ">= 2.11"
+#    s.add_dependency "httpclient", ">= 2.3.3"
+end

data/genome/process_genome_seq.rb ADDED Viewed

@@ -0,0 +1,22 @@
+#
+# Exodus
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+require 'bio'
+Dir.glob('hs_ref_GRCh37*.fa').each do |file|
+	flatfile = Bio::FlatFile.open(Bio::FastaFormat, file)
+	flatfile.each do |e|
+		outfile = File.open("#{e.acc_version}.fa", 'w')
+		outfile.write(e.to_s)
+		outfile.close()
+	end
+end

data/lib/exodb/addon/string.rb ADDED Viewed

@@ -0,0 +1,26 @@
+#
+# Exodus
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+class String
+	def is_miriam?
+		return self =~ /^urn:miriam:/
+	end
+	def id
+		return self.is_miriam? ? self.split(':', 4)[-1] : ''
+	end
+	def namespace
+		return self.is_miriam? ? self.split(':', 4)[2] : ''
+	end
+end

data/lib/exodb/addon.rb ADDED Viewed

@@ -0,0 +1,13 @@
+#
+# Exodus
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+require 'exodb/addon/string.rb'

data/lib/exodb/datamodel/locationfield.rb ADDED Viewed

@@ -0,0 +1,103 @@
+#
+# Exodb
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+module Exodb
+	module LocationField
+		extend ActiveSupport::Concern
+		included do
+			field :location,		type: Hash #{chromosome: '', start: x, stop: x}
+			index({location: 1}, background: true)
+		end
+		module ClassMethods
+			def cover?(loc_str)
+				dat = loc_str.split(/(:|\.\.)/)
+				if dat[4]
+					querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[4].to_i}
+				else
+					querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}
+				end
+				return self.where(querystr)
+			end
+			def intersect?(loc_str)
+				dat = loc_str.split(/(:|\.\.)/)
+				querystr = {:'$or' => [{:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}, {:'location.chromosome' => dat[0], :'location.start'.lte => dat[4].to_i, :'location.stop'.gte => dat[4].to_i}]}
+				return self.where(querystr)
+			end
+			def in?(loc_str)
+				dat = loc_str.split(/(:|\.\.)/)
+				querystr = {:'location.chromosome' => dat[0], :'location.start'.gte => dat[2].to_i, :'location.stop'.lte => dat[4].to_i}
+				return self.where(querystr)
+			end
+			#def converse
+			#	self.where({}).each do |e|
+			#		if e[:location][:coordinates]
+			#			oldlocation = e[:location]
+			#			if oldlocation[:coordinates][0].is_a?(Array)
+			#				e[:location] = {chromosome: oldlocation[:coordinates][0][0], start: oldlocation[:coordinates][0][1], stop: oldlocation[:coordinates][1][1]}
+			#			else
+			#				e[:location] = {chromosome: oldlocation[:coordinates][0], start: oldlocation[:coordinates][1], stop: oldlocation[:coordinates][1]}
+			#			end
+			#			p e.save!
+			#		end
+			#	end
+			#end
+		end
+		# get the start position of gene rely on the genome
+		#
+		# @return [Integer] start position of gene
+		def start
+			self[:location]['start']
+		end
+		# get the end position of gene rely on the genome
+		#
+		# @return [Integer] end position of gene
+		def stop
+			self[:location]['stop']
+		end
+		alias_method :end, :stop
+		# get the chromosome
+		#
+		# @return [Integer] chromosome
+		def chromosome
+			self[:location]['chromosome']
+		end
+		# Assign gene location in format of chromosome_number:start..stop
+		#
+		# @param [String] gene location in format of chromosome_number:start..stop
+		def parse_location(loc_str)
+			dat = loc_str.split(/(:|\.\.)/)
+			if dat[4]
+				self[:location] = {chromosome: dat[0], start: dat[2].to_i, stop: dat[4].to_i}
+			else
+				self[:location] = {chromosome: dat[0], start: dat[2].to_i, stop: dat[2].to_i}
+			end
+		end
+		def location_str
+			return "#{self.chromosome}:#{[self.start, self.stop].uniq.join('..')}"
+		end
+	end
+end

data/lib/exodb/datamodel/reference.rb ADDED Viewed

@@ -0,0 +1,387 @@
+#
+# Exodb
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+module Exodb
+	class Reference
+		include Mongoid::Document
+		include Mongoid::Versioning
+		include Mongoid::Timestamps
+		include Exodb::XrefsField
+		field :oid,					type: String
+	end
+	class Variantref < Reference
+		PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
+		SILENTSIGN = '='
+		include Exodb::LocationField
+		field :reference,			type: String
+		field :alternate,			type: String
+	end
+	class Incidence
+		include Mongoid::Document
+		field :cancertype,			type: String
+		field :position,			type: String # refseq id of chromomose
+		field :occur,				type: Array
+		field :casenumber,			type: Integer
+		embedded_in :generef
+	end
+	class Generef < Reference
+		include Exodb::LocationField
+		field :sequence,			type: String
+		field :chrrefseq,			type: String # refseq id of chromomose
+		field :strand,				type: String
+		field :psuedo,				type: Boolean
+		field :genomeref,			type: String
+		index({sequence: 'text'}, background: true)
+		has_many :genes
+		embeds_many :splices
+		embeds_many :incidences
+		validates_format_of :chrrefseq, with: /\A(urn:miriam:refseq)/
+		# Download sequence from web service please use by caution. NCBI will block scamming sequest
+		#
+		def dl_seq!
+			case self.chrrefseq
+			when /\Aurn:miriam:refseq:/
+				self.sequence = Bio::FastaFormat.new(Bio::NCBI::REST.efetch(self.chrrefseq.split(':', 4), {"db"=>"nucleotide", "rettype"=>"fasta", "retmode"=>"text", "seq_start"=>self.start, "seq_stop"=>self.end})).seq
+			end
+			self.save!
+		end
+		# Download gene symbol from HGNC service
+		#
+		def dl_symbol!
+			baseuri = "http://rest.genenames.org/search"
+			query = ""
+			if self.get_xref('urn:miriam:refseq')
+				query = "#{baseuri}/refseq_accession/#{self.chrrefseq.id.split('.')[0]}"
+			elsif self.get_xref('urn:miriam:ncbigene')
+				query = ""
+			end
+			if !query.empty?
+				response = JSON.parse(open(query, 'Accept' => 'application/json').read)['response']
+				if !response['docs'].empty?
+					response['docs'].each do |e|
+						self.add_to_set(:xrefs, "urn:miriam:hgnc:#{e["hgnc_id"]}")
+						self.add_to_set(:xrefs, "urn:miriam:hgnc.symbol:#{e["symbol"]}")
+					end
+					self.save!
+				end
+			end
+		end
+		# Download incident data from TCGA
+		#
+		def dl_incidence!
+			if self.get_xref('urn:miriam:hgnc.symbol')
+				cancerstudies = []
+				open("http://www.cbioportal.org/public-portal/webservice.do?cmd=getCancerStudies") {|f|
+					f.each_line {|line| cancerstudies.push(line.chomp.split("\t")[0])}
+				}
+				incidents = {}
+				totalcase = {}
+				cancerstudies.each do |study|
+					totalcase[study] = 0 if !totalcase.has_key?(study)
+					open("http://www.cbioportal.org/public-portal/webservice.do?cmd=getCaseLists&cancer_study_id=#{study}") do |f|
+						f.each_line do |line|
+							totalcase[study] += line.chomp.split(/\t/)[4].split(' ').length if line =~ /\tSequenced Tumors\t/
+						end
+					end
+					incidents[study] = {} if !incidents.has_key?(study)
+					open("http://www.cbioportal.org/public-portal/webservice.do?cmd=getMutationData&genetic_profile_id=#{study}_mutations&gene_list=#{self.get_xref('urn:miriam:hgnc.symbol').id}") do |f|
+						f.each_line do |line|
+							dat = line.chomp.split(/\t/)
+							if dat[5] == 'Missense_Mutation'
+								incidents[study][dat[7].split(/(\d+)/)[1]] = [] if !incidents[study].has_key?(dat[7].split(/(\d+)/)[1])
+								incidents[study][dat[7].split(/(\d+)/)[1]].push(dat[2])
+							end
+						end
+					end
+				end
+				self.incidences.clear if self.incidences
+				incidents.each_pair do |cancertype, v|
+					v.each_pair do |position, occur|
+						self.incidences << Incidence.new({cancertype: cancertype, position: position, occur: occur.uniq.sort, casenumber: totalcase[cancertype]})
+					end
+				end
+				self.save!
+			end
+		end
+		# return sequence as Bio::Sequence object
+		#
+		# @return [Bio::Sequence] the contents reversed lexically
+		def to_seq
+			return self.sequence ? Bio::Sequence.auto(self.sequence) : Bio::Sequence.auto("")
+		end
+		# return longest splice of this gene
+		def longest_splice()
+			length = 0
+			longest = nil
+			self.splices.each do |e|
+				if e.prot_len > length
+					length = e.prot_len
+					longest = e
+				end
+			end
+			return longest
+		end
+		# Check that this gene has any splice variant
+		#
+		# @return [Boolean] true if has any splices
+		def has_splices?
+			return self.splices.exists?
+		end
+		# Check if Generef has sequence
+		#
+		# @return [Boolean] Return true if there is a sequence
+		def has_sequence?()
+			return self[:sequence] ? true : false
+		end
+		# Check if Generef can translate
+		#
+		# @return [Boolean] Return true if this can be translate
+		def can_translated?()
+			return self.has_sequence? && self.has_splices? && self.longest_splice != nil ? true : false
+		end
+		# Get gene symbol
+		#
+		# @return [String] Return gene symbol or any id from xrefs or 'nosymbol'
+		def symbol
+			if self.get_xref('urn:miriam:hgnc.symbol')
+				return self.get_xref('urn:miriam:hgnc.symbol').id
+			elsif self.xrefs && !self.xrefs.empty?
+				return self.xrefs.sort[0].id
+			else
+				return 'nosymbol'
+			end
+		end
+	end
+	class Splice
+		include Mongoid::Document
+		include Exodb::XrefsField
+		field :exon,				type: Array
+		field :cds,					type: Array
+		embedded_in :generef
+		# join exon or cds position into a string
+		#
+		# @param [Array] input array exon or cds
+		# @param [Interger] Position to stop positive value for forward read negative value for complement
+		#
+		# @return [String] a string in start..end,start..end,...
+		def get_join_str(arr, position = 0)
+			reducer = self.generef.start - 1
+			tmparr = []
+			found = false
+			if position > 0
+				add = true
+				arr.each do |e|
+					if e[0] <= position && position <= e[1]
+						tmparr.push([e[0], position])
+						add = false
+						found = true
+					else
+						tmparr.push(e) if add
+					end
+				end
+			elsif position < 0
+				position = position.abs
+				add = false
+				arr.each do |e|
+					if e[0] <= position && position <= e[1]
+						tmparr.push([position, e[1]])
+						add = true
+						found = true
+					else
+						tmparr.push(e) if add
+					end
+				end
+			else
+				tmparr = arr
+			end
+			tmparr = [] if !found && position != 0
+			str = []
+			tmparr.each do |e|
+				str.push("#{e[0] - reducer}..#{e[1] - reducer}")
+			end
+			return str.join(',')
+		end
+		def get_exon_join(position = 0)
+			get_join_str(self[:exon], position)
+		end
+		def get_cds_join(position = 0)
+			get_join_str(self[:cds], position)
+		end
+		# Get spliced DNA sequence
+		#
+		# @return [Bio::Sequence] an DNA sequence
+		def get_dna_seq
+			parent =  self.generef
+			return parent.strand == '+' ? parent.to_seq.splicing("join(#{self.get_exon_join})") : parent.to_seq.splicing("complement(join(#{self.get_exon_join}))")
+		end
+		# Get spliced RNA sequence
+		#
+		# @return [Bio::Sequence] an RNA sequence
+		def get_mrna_seq
+			parent =  self.generef
+			return parent.strand == '+' ? parent.to_seq.splicing("join(#{self.get_exon_join})").rna : parent.to_seq.splicing("complement(join(#{self.get_exon_join}))").rna
+		end
+		# Get spliced coding region sequence
+		#
+		# @param [Integer] end position to get sequence
+		#
+		# @return [Bio::Sequence] an coding region sequence
+		def get_cds_seq(position = 0)
+			parent =  self.generef
+			if parent.strand == '+'
+				join = self.get_cds_join(position)
+				return !join.empty? ? parent.to_seq.splicing("join(#{join})") : ""
+			else
+				join = self.get_cds_join(-position)
+				return !join.empty? ? parent.to_seq.splicing("join(#{join})") : ""
+			end
+		end
+		# Get spliced protein sequence
+		#
+		# @return [Bio::Sequence] an protein sequence
+		def get_prot_seq
+			parent =  self.generef
+			return parent.strand == '+' ? parent.to_seq.splicing("join(#{self.get_cds_join})").translate : parent.to_seq.splicing("complement(join(#{self.get_cds_join}))").translate
+		end
+		# get length of spliced RNA
+		#
+		# @return [Integer] length of spliced RNA
+		def rna_len
+			return self.get_mrna_seq.length
+		end
+		# get length of protein product
+		#
+		# @return [Integer] length of protein product
+		def prot_len
+			return self.get_prot_seq.length
+		end
+		# Get the codon sequence at the giving position base on position of amino acid
+		#
+		# @param [Integer] codon position
+		# @return [Bio::Sequence] the codon at given position
+		def get_codon(codon_pos)
+			return self.get_cds_seq().subseq(((codon_pos - 1) * 3) + 1 , ((codon_pos - 1) * 3) + 3)
+		end
+		# convert genomic position to codon position
+		#
+		# @param [Integer] genomic position
+		# @return [Array] Return all information of codon at given position
+		def get_prot_pos(pos)
+			seqlen = self.get_cds_seq(pos).length
+			if seqlen != 0
+				return [(seqlen - 1) / 3, (seqlen - 1) % 3]
+			else
+				return []
+			end
+		end
+	end
+end

data/lib/exodb/datamodel/region.rb ADDED Viewed

@@ -0,0 +1,51 @@
+#
+# Exodb
+# Copyright (C) 2014
+#
+# author: Natapol Pornputtapong <natapol.por@gmail.com>
+#
+# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
+#
+# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
+module Exodb
+	class Region
+		include Mongoid::Document
+		include Mongoid::Timestamps
+	end
+	class Gene < Region
+		include Mongoid::Versioning
+		include Exodb::LocationField
+		field :symbol,				type: String
+		field :loh,					type: Boolean
+		embeds_many :aacids
+		belongs_to :generef
+		belongs_to :cell
+		index({'symbol' => 1, 'aacids.position' => 1}, background: true)
+	end
+	class Aacid < Region
+		field :position,			type: Integer # position referenced to the first codon from the longest splice variant
+		field :refcodon,			type: String
+		field :refaa,				type: String
+		field :altcodon,			type: Hash
+		field :inhcodon,			type: Hash
+		field :isoform,				type: Array
+		embedded_in :gene
+		has_many :variants
+	end
+end