snp-search 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -2
- data/Gemfile.lock +2 -3
- data/README +0 -105
- data/README.rdoc +35 -29
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/bin/snp-search +174 -261
- data/lib/create_methods.rb +196 -0
- data/lib/filter_ignore_snps_methods.rb +130 -0
- data/lib/information_methods.rb +117 -0
- data/lib/output_information_methods.rb +131 -0
- data/lib/snp-search.rb +18 -280
- data/lib/snp_db_connection.rb +1 -2
- data/lib/snp_db_models.rb +3 -3
- data/lib/snp_db_schema.rb +119 -80
- data/pkg/snp-search-1.1.0.gem +0 -0
- data/pkg/snp-search-1.2.0.gem +0 -0
- data/pkg/snp-search-2.3.0.gem +0 -0
- data/snp-search.gemspec +15 -12
- metadata +73 -33
- data/.rspec +0 -1
    
        data/lib/snp-search.rb
    CHANGED
    
    | @@ -1,301 +1,39 @@ | |
| 1 1 | 
             
            require 'rubygems'
         | 
| 2 | 
            -
            gem "bio", "~> 1.4.2"
         | 
| 3 2 | 
             
            require 'bio'
         | 
| 4 3 | 
             
            require  'snp_db_models'
         | 
| 5 4 | 
             
            require 'activerecord-import'
         | 
| 6 5 | 
             
            require 'diff/lcs'
         | 
| 6 | 
            +
            require 'create_methods'
         | 
| 7 | 
            +
            require 'filter_ignore_snps_methods'
         | 
| 8 | 
            +
            require 'output_information_methods'
         | 
| 7 9 |  | 
| 8 | 
            -
             | 
| 9 | 
            -
            def guess_sequence_format(reference_genome)
         | 
| 10 | 
            -
              file_extension = File.extname(reference_genome).downcase
         | 
| 11 | 
            -
              file_format = nil
         | 
| 12 | 
            -
              case file_extension
         | 
| 13 | 
            -
              when ".gbk", ".genbank", ".gb"
         | 
| 14 | 
            -
                file_format = :genbank
         | 
| 15 | 
            -
              when ".embl", ".emb"
         | 
| 16 | 
            -
                file_format = :embl
         | 
| 17 | 
            -
              end
         | 
| 18 | 
            -
              return file_format
         | 
| 19 | 
            -
            end
         | 
| 20 | 
            -
             | 
| 21 | 
            -
            # A method to populate the database with the features (genes etc) and the annotations from the gbk/embl file.  
         | 
| 22 | 
            -
            # We include all features that are not 'source' or 'gene' as they are repetitive info.  'CDS' is the gene.
         | 
| 23 | 
            -
            # The annotation table includes also the start and end coordinates of the CDS.  The strand is also included.  the 'locations' method is defined in bioruby under genbank.  It must be required at the top (bio).
         | 
| 24 | 
            -
            #Also, the qualifier and value are extracted from the gbk/embl file and added to the database.
         | 
| 25 | 
            -
            def populate_features_and_annotations(sequence_file)
         | 
| 26 | 
            -
            	puts "Adding features and their annotations...."
         | 
| 27 | 
            -
            	 ActiveRecord::Base.transaction do
         | 
| 28 | 
            -
            	 	counter = 0
         | 
| 29 | 
            -
            		sequence_file.features.each do |feature|
         | 
| 30 | 
            -
            			counter += 1
         | 
| 31 | 
            -
            			puts "Total number of features and annotations added: #{counter}" if counter % 100 == 0
         | 
| 32 | 
            -
            			unless feature.feature == "source" || feature.feature == "gene"
         | 
| 33 | 
            -
            				db_feature = Feature.new
         | 
| 34 | 
            -
            				db_feature.start = feature.locations.first.from
         | 
| 35 | 
            -
            				db_feature.end = feature.locations.first.to
         | 
| 36 | 
            -
            				db_feature.strand = feature.locations.first.strand
         | 
| 37 | 
            -
            				db_feature.name = feature.feature
         | 
| 38 | 
            -
            				db_feature.save
         | 
| 39 | 
            -
            				# Populate the Annotation table with qualifier information from the genbank file
         | 
| 40 | 
            -
            			feature.qualifiers.each do |qualifier|
         | 
| 41 | 
            -
            				a = Annotation.new
         | 
| 42 | 
            -
            				a.qualifier = qualifier.qualifier
         | 
| 43 | 
            -
            				a.value = qualifier.value
         | 
| 44 | 
            -
            				a.save
         | 
| 45 | 
            -
            				db_feature.annotations << a
         | 
| 46 | 
            -
            			end
         | 
| 47 | 
            -
            			end
         | 
| 48 | 
            -
            		end
         | 
| 49 | 
            -
            	end
         | 
| 50 | 
            -
            end
         | 
| 51 | 
            -
             | 
| 52 | 
            -
            #This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
         | 
| 53 | 
            -
            def populate_snps_alleles_genotypes(vcf_file, cuttoff_snp, cuttoff_genotype)
         | 
| 54 | 
            -
             | 
| 55 | 
            -
            puts "Adding SNPs........"
         | 
| 56 | 
            -
            # open vcf file and parse each line
         | 
| 57 | 
            -
            	File.open(vcf_file) do |f|
         | 
| 58 | 
            -
            	  # header names
         | 
| 59 | 
            -
            		while line = f.gets
         | 
| 60 | 
            -
            	  		if  line =~ /CHROM/
         | 
| 61 | 
            -
            	  			line.chomp!
         | 
| 62 | 
            -
            				column_headings = line.split("\t")
         | 
| 63 | 
            -
            				strain_names = column_headings[9..-1]
         | 
| 64 | 
            -
            				strain_names.map!{|name| name.sub(/\..*/, '')}
         | 
| 65 | 
            -
            					
         | 
| 66 | 
            -
            				strain_names.each do |str|
         | 
| 67 | 
            -
            					ss = Strain.new
         | 
| 68 | 
            -
            					ss.name = str
         | 
| 69 | 
            -
            					ss.save
         | 
| 70 | 
            -
            				end
         | 
| 71 | 
            -
            				
         | 
| 72 | 
            -
            				strains = Array.new
         | 
| 73 | 
            -
            			 	strain_names.each do |strain_name|
         | 
| 74 | 
            -
            			   		strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
         | 
| 75 | 
            -
            			   		strains << strain
         | 
| 76 | 
            -
            				 end
         | 
| 77 | 
            -
             | 
| 78 | 
            -
            		 		good_snps = 0
         | 
| 79 | 
            -
            		  		# start parsing snps
         | 
| 80 | 
            -
            				while line = f.gets
         | 
| 81 | 
            -
            					#puts line
         | 
| 82 | 
            -
            				    details = line.split("\t")
         | 
| 83 | 
            -
            				    ref = details[0]
         | 
| 84 | 
            -
            				    ref_pos = details[1]
         | 
| 85 | 
            -
            				    ref_base = details[3]
         | 
| 86 | 
            -
            				    snp_base = details[4]
         | 
| 87 | 
            -
            				    snp_qual = details [5]
         | 
| 88 | 
            -
            				    format = details[8].split(":")
         | 
| 89 | 
            -
            				    gt = format.index("GT")
         | 
| 90 | 
            -
            				    gq = format.index("GQ")
         | 
| 91 | 
            -
            				    # dp = format.index("DP")
         | 
| 92 | 
            -
            				    samples = details[9..-1]
         | 
| 93 | 
            -
             | 
| 94 | 
            -
            			     	next if ref_base.size != 1 || snp_base.size != 1 # exclude indels (e.g. G,A in REF)
         | 
| 95 | 
            -
            				    genotypes = samples.map do |s| 
         | 
| 96 | 
            -
            				      format_values = s.chomp.split(":") # output (e.g.): 0/0 \n 0,255,209 \n 99
         | 
| 97 | 
            -
            				      format_values[gt] # e.g. 0/0  
         | 
| 98 | 
            -
            				    end
         | 
| 10 | 
            +
            def find_unqiue_snps(strain_names, out, cuttoff_genotype, cuttoff_snp)
         | 
| 99 11 |  | 
| 100 | 
            -
             | 
| 101 | 
            -
            				      format_values = s.chomp.split(":")
         | 
| 102 | 
            -
            				      format_values[gq] # e.g. 99
         | 
| 103 | 
            -
            				    end
         | 
| 12 | 
            +
              *strain_names = strain_names
         | 
| 104 13 |  | 
| 105 | 
            -
             | 
| 14 | 
            +
              where_statement = strain_names.collect{|strain_name| "strains.name = '#{strain_name}' OR "}.join("").sub(/ OR $/, "")
         | 
| 106 15 |  | 
| 107 | 
            -
             | 
| 108 | 
            -
            				    variant_genotypes = Array.new
         | 
| 109 | 
            -
            				    genotypes.each_with_index do |gt, index| # indexes each 'genotypes'.
         | 
| 110 | 
            -
            				    	if gt == "1/1"
         | 
| 111 | 
            -
            					        variant_genotypes << index # variant_genotypes is the position of genome positions that have a correct SNP with 1/1.  if you want the total number of strains thats have 1/1 for that row (genome position) then puts variant_genotypes.size
         | 
| 112 | 
            -
            					        if genotypes_qualities[index].to_i >= cuttoff_genotype.to_i
         | 
| 113 | 
            -
            					           high_quality_variant_genotypes << index        
         | 
| 114 | 
            -
            					    	end
         | 
| 115 | 
            -
            						end
         | 
| 116 | 
            -
            					end
         | 
| 16 | 
            +
              outfile = File.open(out, "w")
         | 
| 117 17 |  | 
| 118 | 
            -
             | 
| 119 | 
            -
            						if gq.to_i >= cuttoff_genotype.to_i
         | 
| 120 | 
            -
            							geno_quality_array << gq
         | 
| 121 | 
            -
            						end
         | 
| 122 | 
            -
            					end
         | 
| 18 | 
            +
               snps = Snp.find_by_sql("SELECT snps.* from snps INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id INNER JOIN strains ON strains.id = genotypes.strain_id WHERE (#{where_statement}) AND alleles.id <> snps.reference_allele_id AND genotypes.geno_qual >= #{cuttoff_genotype} AND snps.qual >= #{cuttoff_snp} AND (SELECT COUNT(*) from snps AS s INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id WHERE alleles.id <> snps.reference_allele_id and s.id = snps.id) = #{strain_names.size} GROUP BY snps.id HAVING COUNT(*) = #{strain_names.size}")
         | 
| 123 19 |  | 
| 124 | 
            -
             | 
| 125 | 
            -
            					# puts "yay" if geno_quality_array.keep_if {|z| z <= cuttoff_genotype.to_i}
         | 
| 20 | 
            +
               puts "The number of unique snps are #{snps.size}"
         | 
| 126 21 |  | 
| 127 | 
            -
             | 
| 128 | 
            -
            					next if samples.include?("./.")
         | 
| 129 | 
            -
            					 next if geno_quality_array.size != strains.size
         | 
| 130 | 
            -
            						if snp_qual.to_i >= cuttoff_snp.to_i && genotypes.include?("1/1") &&  ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size
         | 
| 131 | 
            -
            					   # first condition checks the overall quality of the SNP 	is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant 	type with quality >= 30 and informative SNP
         | 
| 132 | 
            -
            						
         | 
| 133 | 
            -
            				    	 if  genotypes.include?("0/0") && !genotypes.include?("0/1") # exclude SNPs which are all 1/1 i.e something strange about ref and those which have confusing heterozygote 0/1s
         | 
| 134 | 
            -
            				    	 
         | 
| 135 | 
            -
            					        good_snps +=1
         | 
| 136 | 
            -
            					        # puts good_snps
         | 
| 137 | 
            -
            					        #create snp
         | 
| 138 | 
            -
            					        s = Snp.new
         | 
| 139 | 
            -
            					        s.ref_pos = ref_pos
         | 
| 140 | 
            -
            					        s.qual = snp_qual
         | 
| 141 | 
            -
            					        s.save
         | 
| 142 | 
            -
            					        					 
         | 
| 143 | 
            -
            					   #  create ref allele
         | 
| 144 | 
            -
            					        ref_allele = Allele.new
         | 
| 145 | 
            -
            					        ref_allele.base = ref_base
         | 
| 146 | 
            -
            					        ref_allele.snp = s
         | 
| 147 | 
            -
            					        ref_allele.save
         | 
| 148 | 
            -
             | 
| 149 | 
            -
            					        s.reference_allele = ref_allele
         | 
| 150 | 
            -
            					        s.save
         | 
| 151 | 
            -
             | 
| 152 | 
            -
            					        # create snp allele
         | 
| 153 | 
            -
            					        snp_allele = Allele.new
         | 
| 154 | 
            -
            					        snp_allele.base = snp_base
         | 
| 155 | 
            -
            					        snp_allele.snp = s
         | 
| 156 | 
            -
            					        snp_allele.save
         | 
| 157 | 
            -
            					        
         | 
| 158 | 
            -
            					     	ActiveRecord::Base.transaction do
         | 
| 159 | 
            -
            						    	genos = []
         | 
| 160 | 
            -
            							    genotypes.each_with_index do |gt, index|
         | 
| 161 | 
            -
            							         genotype = Genotype.new
         | 
| 162 | 
            -
            							         genotype.strain = strains[index]
         | 
| 163 | 
            -
            							         genotype.geno_qual = genotypes_qualities[index].to_i
         | 
| 164 | 
            -
            							    	 puts index if strains[index].nil?
         | 
| 165 | 
            -
            							          if gt == "0/0" # wild type
         | 
| 166 | 
            -
            							             genotype.allele = ref_allele
         | 
| 167 | 
            -
            							          elsif gt == "1/1" # snp type
         | 
| 168 | 
            -
            							             genotype.allele = snp_allele
         | 
| 169 | 
            -
            							           else
         | 
| 170 | 
            -
            							             puts "Strange SNP #{gt}"
         | 
| 171 | 
            -
            							          end
         | 
| 172 | 
            -
            							          genos << genotype
         | 
| 173 | 
            -
            								end
         | 
| 174 | 
            -
            								# Using activerecord-import to speed up importing
         | 
| 175 | 
            -
            						        Genotype.import genos, :validate => false 
         | 
| 176 | 
            -
            								 puts "Total SNPs added so far: #{good_snps}" if good_snps % 100 == 0
         | 
| 177 | 
            -
            							end
         | 
| 178 | 
            -
            				      	end
         | 
| 179 | 
            -
            				     
         | 
| 180 | 
            -
            			    	end
         | 
| 181 | 
            -
            			    end
         | 
| 182 | 
            -
            			end
         | 
| 183 | 
            -
            		end
         | 
| 184 | 
            -
            	end
         | 
| 185 | 
            -
            	#Here we link the features to snps.
         | 
| 186 | 
            -
            	Snp.all.each do |snp|
         | 
| 187 | 
            -
            		x = Feature.where("features.start <= ? AND features.end >= ?", snp.ref_pos, snp.ref_pos).first
         | 
| 188 | 
            -
            		snp.feature = x
         | 
| 189 | 
            -
            		snp.save
         | 
| 190 | 
            -
            	end
         | 
| 22 | 
            +
               output_information_methods(snps, outfile, cuttoff_genotype, cuttoff_snp, false)
         | 
| 191 23 | 
             
            end
         | 
| 192 24 |  | 
| 193 | 
            -
            def find_shared_snps(strain_names)
         | 
| 194 | 
            -
                *strain_names = strain_names
         | 
| 195 | 
            -
                 
         | 
| 196 | 
            -
               where_statement = strain_names.collect{|strain_name| "strains.name = '#{strain_name}' OR "}.join("").sub(/ OR $/, "")
         | 
| 197 | 
            -
             | 
| 198 | 
            -
               Snp.find_by_sql("SELECT * from snps INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id INNER JOIN strains ON strains.id = genotypes.strain_id WHERE (#{where_statement}) AND alleles.id <> snps.reference_allele_id AND (SELECT COUNT(*) from snps AS s INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id WHERE alleles.id <> snps.reference_allele_id and s.id = snps.id) = #{strain_names.size} GROUP BY snps.id HAVING COUNT(*) = #{strain_names.size}")
         | 
| 199 | 
            -
            end
         | 
| 200 | 
            -
             | 
| 201 | 
            -
            def synonymous(sequence_file)
         | 
| 202 | 
            -
             | 
| 203 | 
            -
            	#Reference Sequence
         | 
| 204 | 
            -
            	genome_sequence = Bio::FlatFile.open(Bio::GenBank, sequence_file).next_entry
         | 
| 205 | 
            -
             | 
| 206 | 
            -
            	#Extract all nucleotide sequence from ORIGIN
         | 
| 207 | 
            -
            	all_seqs_original = genome_sequence.seq
         | 
| 208 | 
            -
            	ref_bases =[]
         | 
| 209 25 |  | 
| 210 | 
            -
             | 
| 26 | 
            +
            def information(out, cuttoff_genotype, cuttoff_snp)
         | 
| 211 27 |  | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
                  # create an empty array
         | 
| 216 | 
            -
                  strains.each do |strain|
         | 
| 217 | 
            -
                    strains_hash[strain.id] = Array.new
         | 
| 218 | 
            -
                  end
         | 
| 28 | 
            +
              puts "outputting SNP info....."
         | 
| 29 | 
            +
              
         | 
| 30 | 
            +
              strains = Strain.all
         | 
| 219 31 |  | 
| 220 | 
            -
             | 
| 32 | 
            +
              snps = Snp.find_by_sql("SELECT distinct snps.* from snps INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id INNER JOIN strains ON strains.id = genotypes.strain_id where alleles.id <> snps.reference_allele_id")
         | 
| 221 33 |  | 
| 222 | 
            -
             | 
| 34 | 
            +
              outfile = File.open(out, "w")
         | 
| 223 35 |  | 
| 36 | 
            +
              output_information_methods(snps, outfile, cuttoff_genotype, cuttoff_snp, true)
         | 
| 224 37 |  | 
| 225 | 
            -
             | 
| 226 | 
            -
            		variant.snps.each do |snp|
         | 
| 227 | 
            -
            			snp.alleles.each do |allele|
         | 
| 228 | 
            -
            				if allele.id != snp.reference_allele_id
         | 
| 229 | 
            -
            					all_seqs_mutated = genome_sequence.seq
         | 
| 230 | 
            -
            					mutated_seq_translated = []
         | 
| 231 | 
            -
            					original_seq_translated = []
         | 
| 232 | 
            -
            					all_seqs_mutated[snp.ref_pos.to_i-1] = allele.base
         | 
| 233 | 
            -
             | 
| 234 | 
            -
            					mutated_seq = Bio::Sequence.auto(all_seqs_mutated[variant.start-1..variant.end-1])
         | 
| 235 | 
            -
            					original_seq =  Bio::Sequence.auto(all_seqs_original[variant.start-1..variant.end-1])
         | 
| 236 | 
            -
             | 
| 237 | 
            -
            					if variant.strand == -1
         | 
| 238 | 
            -
            						mutated_seq_translated << mutated_seq.reverse_complement.translate
         | 
| 239 | 
            -
            						original_seq_translated << original_seq.reverse_complement.translate
         | 
| 240 | 
            -
            					    
         | 
| 241 | 
            -
            					else
         | 
| 242 | 
            -
            						mutated_seq_translated << mutated_seq.translate
         | 
| 243 | 
            -
            						original_seq_translated << original_seq.translate
         | 
| 244 | 
            -
            						
         | 
| 245 | 
            -
            				 	end
         | 
| 246 | 
            -
             | 
| 247 | 
            -
            					 	mutated_seq_translated.zip(original_seq_translated).each do |mut, org|
         | 
| 248 | 
            -
            					 		mutated_seq_translated_clean = mut.gsub(/\*$/,"")
         | 
| 249 | 
            -
            					 		original_seq_translated_clean = org.gsub(/\*$/,"")
         | 
| 250 | 
            -
             | 
| 251 | 
            -
            							hydrophobic = ["I", "L", "V", "C", "A", "G", "M", "F", "Y", "W", "H", "T"]
         | 
| 252 | 
            -
            							non_hydrophobic = ["K", "E", "Q", "D", "N", "S", "P", "B"]
         | 
| 253 | 
            -
             | 
| 254 | 
            -
            							polar = ["Y", "W", "H", "K", "R", "E", "Q", "D", "N", "S", "P", "B"]
         | 
| 255 | 
            -
            							non_polar = ["I", "L", "V", "C", "A", "G", "M", "F", "T"]
         | 
| 256 | 
            -
             | 
| 257 | 
            -
            							small = ["V","C","A","G","D","N","S","T","P"]
         | 
| 258 | 
            -
            							non_small = ["I","L","M","F","Y","W","H","K","R","E","Q"]
         | 
| 259 | 
            -
             | 
| 260 | 
            -
            							if original_seq_translated_clean == mutated_seq_translated_clean
         | 
| 261 | 
            -
            							# if original_seq_translated == mutated_seq_translated
         | 
| 262 | 
            -
            								if mutated_seq_translated_clean =~ /\*/
         | 
| 263 | 
            -
            									puts "#{variant.start}\t#{variant.end}\t#{snp.ref_pos}\t#{all_seqs_original[snp.ref_pos.to_i-1].upcase}\t#{(allele.base).upcase}\tsynonymous\t\t\tYes"
         | 
| 264 | 
            -
            								else
         | 
| 265 | 
            -
            									puts "#{variant.start}\t#{variant.end}\t#{snp.ref_pos}\t#{all_seqs_original[snp.ref_pos.to_i-1].upcase}\t#{(allele.base).upcase}\tsynonymous"
         | 
| 266 | 
            -
            								end
         | 
| 267 | 
            -
            							else
         | 
| 268 | 
            -
            								
         | 
| 269 | 
            -
            								diffs = Diff::LCS.diff(original_seq_translated_clean, mutated_seq_translated_clean)
         | 
| 270 | 
            -
            								
         | 
| 271 | 
            -
            								if mutated_seq_translated_clean =~ /\*/
         | 
| 272 | 
            -
            									puts "#{variant.start}\t#{variant.end}\t#{snp.ref_pos}\t#{all_seqs_original[snp.ref_pos.to_i-1].upcase}\t#{(allele.base).upcase}\tnon-synonymous\t#{diffs[0][0].element}\t#{diffs[0][1].element}\tYes\t#{'Yes' if (hydrophobic.include? diffs[0][0].element) == (non_hydrophobic.include? diffs[0][1].element)}\t#{'Yes' if (polar.include? diffs[0][0].element) == (non_polar.include? diffs[0][1].element)}\t#{'Yes' if (small.include? diffs[0][0].element) == (non_small.include? diffs[0][1].element)}"
         | 
| 273 | 
            -
            								else
         | 
| 274 | 
            -
            									puts "#{variant.start}\t#{variant.end}\t#{snp.ref_pos}\t#{all_seqs_original[snp.ref_pos.to_i-1].upcase}\t#{(allele.base).upcase}\tnon-synonymous\t#{diffs[0][0].element}\t#{diffs[0][1].element}\t\t#{'Yes' if (hydrophobic.include? diffs[0][0].element) == (non_hydrophobic.include? diffs[0][1].element)}\t#{'Yes' if (polar.include? diffs[0][0].element) == (non_polar.include? diffs[0][1].element)}\t#{'Yes' if (small.include? diffs[0][0].element) == (non_small.include? diffs[0][1].element)}"
         | 
| 275 | 
            -
            								end
         | 
| 276 | 
            -
            							end
         | 
| 277 | 
            -
            						end
         | 
| 278 | 
            -
            					
         | 
| 279 | 
            -
            				end
         | 
| 280 | 
            -
            			end
         | 
| 281 | 
            -
            		end
         | 
| 282 | 
            -
            	end
         | 
| 283 | 
            -
             | 
| 284 | 
            -
            	#Take all SNP positions in ref genome
         | 
| 285 | 
            -
            	# snp_positions = Feature.find_by_sql("select snps.ref_pos from features inner join snps on features.id = snps.feature_id inner join alleles on snps.id = alleles.snp_id where alleles.id <> snps.reference_allele_id and features.name = 'CDS'").map{|snp| snp.ref_pos}
         | 
| 286 | 
            -
             | 
| 287 | 
            -
            	# # Take all SNP nucleotide
         | 
| 288 | 
            -
            	# snps = Feature.find_by_sql("select alleles.base from features inner join snps on features.id = snps.feature_id inner join alleles on snps.id = alleles.snp_id where alleles.id <> snps.reference_allele_id and features.name = 'CDS'").map{|allele| allele.base}
         | 
| 289 | 
            -
             | 
| 290 | 
            -
            	# # Mutate (substitute) the original sequence with the SNPs
         | 
| 291 | 
            -
            	
         | 
| 292 | 
            -
            	# # Here all_seqs_original are all the nucelotide sequences but with the snps subsituted in them
         | 
| 293 | 
            -
             | 
| 294 | 
            -
            	# #Get start position of CDS with SNP
         | 
| 295 | 
            -
            	# coordinates_start = Feature.find_by_sql("select start from features inner join snps on features.id = snps.feature_id inner join alleles on snps.id = alleles.snp_id where features.name = 'CDS' and alleles.id <> snps.reference_allele_id").map{|feature| feature.start}
         | 
| 296 | 
            -
             | 
| 297 | 
            -
            	# #Get end position of CDS with SNP
         | 
| 298 | 
            -
            	# coordinates_end = Feature.find_by_sql("select end from features inner join snps on features.id = snps.feature_id inner join alleles on snps.id = alleles.snp_id where features.name = 'CDS' and alleles.id <> snps.reference_allele_id").map{|feature| feature.end}	
         | 
| 38 | 
            +
            end
         | 
| 299 39 |  | 
| 300 | 
            -
            	
         | 
| 301 | 
            -
            end
         | 
    
        data/lib/snp_db_connection.rb
    CHANGED
    
    
    
        data/lib/snp_db_models.rb
    CHANGED
    
    | @@ -6,13 +6,13 @@ class Strain < ActiveRecord::Base | |
| 6 6 | 
             
            end
         | 
| 7 7 |  | 
| 8 8 | 
             
            class Feature < ActiveRecord::Base
         | 
| 9 | 
            +
              has_and_belongs_to_many :snps
         | 
| 9 10 | 
             
              has_many :annotations
         | 
| 10 | 
            -
              has_many :snps
         | 
| 11 11 | 
             
            end
         | 
| 12 12 |  | 
| 13 13 | 
             
            class Snp < ActiveRecord::Base
         | 
| 14 | 
            -
               | 
| 15 | 
            -
              has_many | 
| 14 | 
            +
              has_and_belongs_to_many :features
         | 
| 15 | 
            +
              has_many :alleles
         | 
| 16 16 | 
             
              belongs_to :reference_allele, :class_name => "Allele", :foreign_key => "reference_allele_id"
         | 
| 17 17 | 
             
            end
         | 
| 18 18 |  | 
    
        data/lib/snp_db_schema.rb
    CHANGED
    
    | @@ -1,93 +1,132 @@ | |
| 1 1 | 
             
            def db_schema
         | 
| 2 | 
            -
            ActiveRecord::Schema.define do
         | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 2 | 
            +
              ActiveRecord::Schema.define do
         | 
| 3 | 
            +
                unless table_exists? :strains
         | 
| 4 | 
            +
                  create_table :strains do |t|
         | 
| 5 | 
            +
                    t.column :name, :string
         | 
| 6 | 
            +
                    t.column :description, :string
         | 
| 7 | 
            +
                  end
         | 
| 7 8 | 
             
                end
         | 
| 8 | 
            -
              end
         | 
| 9 9 |  | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
                   | 
| 15 | 
            -
                  t.column :end, :integer
         | 
| 16 | 
            -
                  t.column :strand, :integer
         | 
| 10 | 
            +
                unless table_exists? :features_snps
         | 
| 11 | 
            +
                  create_table :features_snps, :id => false do |t|
         | 
| 12 | 
            +
                    t.column :feature_id, :integer, :null => false
         | 
| 13 | 
            +
                    t.column :snp_id, :integer, :null => false
         | 
| 14 | 
            +
                  end
         | 
| 17 15 | 
             
                end
         | 
| 18 | 
            -
              end
         | 
| 19 16 |  | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 17 | 
            +
                unless table_exists? :features
         | 
| 18 | 
            +
                  create_table :features do |t|
         | 
| 19 | 
            +
                    t.column :name, :string
         | 
| 20 | 
            +
                    t.column :start, :integer
         | 
| 21 | 
            +
                    t.column :end, :integer
         | 
| 22 | 
            +
                    t.column :strand, :integer
         | 
| 23 | 
            +
                    t.column :sequence, :string
         | 
| 24 | 
            +
                  end
         | 
| 26 25 | 
             
                end
         | 
| 27 | 
            -
              end
         | 
| 28 26 |  | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 27 | 
            +
                unless table_exists? :snps
         | 
| 28 | 
            +
                  create_table :snps do |t|
         | 
| 29 | 
            +
                    t.column :ref_pos, :integer
         | 
| 30 | 
            +
                    t.column :qual, :float 
         | 
| 31 | 
            +
                    t.column :reference_allele_id, :integer
         | 
| 32 | 
            +
                  end
         | 
| 33 33 | 
             
                end
         | 
| 34 | 
            -
              end
         | 
| 35 | 
            -
             
         | 
| 36 | 
            -
              unless table_exists? :genotypes
         | 
| 37 | 
            -
                create_table :genotypes do |t|
         | 
| 38 | 
            -
                  t.column :allele_id, :integer
         | 
| 39 | 
            -
                  t.column :strain_id, :integer
         | 
| 40 | 
            -
                  t.column :geno_qual, :float
         | 
| 41 | 
            -
                end
         | 
| 42 | 
            -
              end
         | 
| 43 34 |  | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
                   | 
| 35 | 
            +
                unless table_exists? :alleles
         | 
| 36 | 
            +
                  create_table :alleles do |t|
         | 
| 37 | 
            +
                    t.column :snp_id, :integer
         | 
| 38 | 
            +
                    t.column :base, :string
         | 
| 39 | 
            +
                  end
         | 
| 49 40 | 
             
                end
         | 
| 50 | 
            -
              end
         | 
| 51 41 |  | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
                 | 
| 61 | 
            -
             | 
| 62 | 
            -
               | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
                 | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
                 | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
                 | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
                 | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
                 | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 90 | 
            -
                 | 
| 42 | 
            +
                unless table_exists? :genotypes
         | 
| 43 | 
            +
                  create_table :genotypes do |t|
         | 
| 44 | 
            +
                    t.column :allele_id, :integer
         | 
| 45 | 
            +
                    t.column :strain_id, :integer
         | 
| 46 | 
            +
                    t.column :geno_qual, :float
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                unless table_exists? :annotations
         | 
| 51 | 
            +
                  create_table :annotations do |t|
         | 
| 52 | 
            +
                    t.column  :qualifier, :string
         | 
| 53 | 
            +
                    t.column :value, :string
         | 
| 54 | 
            +
                    t.column :feature_id, :integer
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
                 
         | 
| 58 | 
            +
                # indices
         | 
| 59 | 
            +
                unless index_exists? :strains, :id
         | 
| 60 | 
            +
                  add_index :strains, :id
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
                unless index_exists? :strains, :name
         | 
| 63 | 
            +
                  add_index :strains, :name
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
                unless index_exists? :features, :id
         | 
| 66 | 
            +
                  add_index :features, :id
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
                unless index_exists? :features, :name
         | 
| 69 | 
            +
                  add_index :features, :name
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
                unless index_exists? :features, :start
         | 
| 72 | 
            +
                  add_index :features, :start
         | 
| 73 | 
            +
                end
         | 
| 74 | 
            +
                unless index_exists? :features, :end
         | 
| 75 | 
            +
                  add_index :features, :end
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
                unless index_exists? :features, :strand
         | 
| 78 | 
            +
                  add_index :features, :strand
         | 
| 79 | 
            +
                end
         | 
| 80 | 
            +
                unless index_exists? :features, :sequence
         | 
| 81 | 
            +
                  add_index :features, :sequence
         | 
| 82 | 
            +
                end
         | 
| 83 | 
            +
                unless index_exists? :snps, :id
         | 
| 84 | 
            +
                  add_index :snps, :id
         | 
| 85 | 
            +
                end
         | 
| 86 | 
            +
                unless index_exists? :snps, :ref_pos
         | 
| 87 | 
            +
                  add_index :snps, :ref_pos
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
                unless index_exists? :snps, :qual
         | 
| 90 | 
            +
                  add_index :snps, :qual
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
                unless index_exists? :snps, :reference_allele_id
         | 
| 93 | 
            +
                  add_index :snps, :reference_allele_id
         | 
| 94 | 
            +
                end
         | 
| 95 | 
            +
                unless index_exists? :features_snps, :feature_id
         | 
| 96 | 
            +
                  add_index :features_snps, :feature_id
         | 
| 97 | 
            +
                end
         | 
| 98 | 
            +
                unless index_exists? :features_snps, :snp_id
         | 
| 99 | 
            +
                  add_index :features_snps, :snp_id
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
                unless index_exists? :snps, :qual
         | 
| 102 | 
            +
                  add_index :snps, :qual
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
                unless index_exists? :alleles, :snp_id
         | 
| 105 | 
            +
                  add_index :alleles, :snp_id
         | 
| 106 | 
            +
                end
         | 
| 107 | 
            +
                unless index_exists? :alleles, :base
         | 
| 108 | 
            +
                  add_index :alleles, :base
         | 
| 109 | 
            +
                end
         | 
| 110 | 
            +
                unless index_exists? :genotypes, :id
         | 
| 111 | 
            +
                  add_index :genotypes, :id
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
                unless index_exists? :genotypes, :allele_id
         | 
| 114 | 
            +
                  add_index :genotypes, :allele_id
         | 
| 115 | 
            +
                end
         | 
| 116 | 
            +
                unless index_exists? :genotypes, :strain_id
         | 
| 117 | 
            +
                  add_index :genotypes, :strain_id
         | 
| 118 | 
            +
                end
         | 
| 119 | 
            +
                unless index_exists? :genotypes, :geno_qual
         | 
| 120 | 
            +
                  add_index :genotypes, :geno_qual
         | 
| 121 | 
            +
                end
         | 
| 122 | 
            +
                unless index_exists? :annotations, :feature_id
         | 
| 123 | 
            +
                    add_index :annotations, :feature_id
         | 
| 124 | 
            +
                end
         | 
| 125 | 
            +
                unless index_exists? :annotations, :qualifier
         | 
| 126 | 
            +
                  add_index :annotations, :qualifier
         | 
| 127 | 
            +
                end
         | 
| 128 | 
            +
                unless index_exists? :annotations, :value
         | 
| 129 | 
            +
                  add_index :annotations, :value
         | 
| 130 | 
            +
                end
         | 
| 91 131 | 
             
              end
         | 
| 92 | 
            -
            end
         | 
| 93 132 | 
             
            end
         |