snp-search 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +6 -1
- data/Gemfile.lock +23 -0
- data/Rakefile +3 -3
- data/VERSION +1 -1
- data/bin/snp-search +63 -0
- data/lib/{snp-search.rb → snp-search/snp-search.rb} +18 -6
- data/lib/{snp_db_connection.rb → snp-search/snp_db_connection.rb} +1 -1
- data/lib/{snp_db_models.rb → snp-search/snp_db_models.rb} +0 -0
- data/lib/{snp_db_schema.rb → snp-search/snp_db_schema.rb} +2 -3
- data/snp-search.gemspec +18 -7
- data/snp-search_test.rb +207 -0
- metadata +52 -17
- data/lib/user_entry_file.rb +0 -21
data/Gemfile
CHANGED
@@ -3,6 +3,10 @@ source "http://rubygems.org"
|
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
5
|
|
6
|
+
gem "activerecord"
|
7
|
+
gem "bio"
|
8
|
+
gem "slop"
|
9
|
+
|
6
10
|
# Add dependencies to develop your gem here.
|
7
11
|
# Include everything needed to run rake, tests, features, etc.
|
8
12
|
group :development do
|
@@ -10,4 +14,5 @@ group :development do
|
|
10
14
|
gem "bundler", "~> 1.0.0"
|
11
15
|
gem "jeweler", "~> 1.6.4"
|
12
16
|
gem "rcov", ">= 0"
|
13
|
-
|
17
|
+
|
18
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -1,12 +1,30 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
activemodel (3.1.0)
|
5
|
+
activesupport (= 3.1.0)
|
6
|
+
bcrypt-ruby (~> 3.0.0)
|
7
|
+
builder (~> 3.0.0)
|
8
|
+
i18n (~> 0.6)
|
9
|
+
activerecord (3.1.0)
|
10
|
+
activemodel (= 3.1.0)
|
11
|
+
activesupport (= 3.1.0)
|
12
|
+
arel (~> 2.2.1)
|
13
|
+
tzinfo (~> 0.3.29)
|
14
|
+
activesupport (3.1.0)
|
15
|
+
multi_json (~> 1.0)
|
16
|
+
arel (2.2.1)
|
17
|
+
bcrypt-ruby (3.0.0)
|
18
|
+
bio (1.4.2)
|
19
|
+
builder (3.0.0)
|
4
20
|
diff-lcs (1.1.3)
|
5
21
|
git (1.2.5)
|
22
|
+
i18n (0.6.0)
|
6
23
|
jeweler (1.6.4)
|
7
24
|
bundler (~> 1.0)
|
8
25
|
git (>= 1.2.5)
|
9
26
|
rake
|
27
|
+
multi_json (1.0.3)
|
10
28
|
rake (0.9.2.2)
|
11
29
|
rcov (0.9.11)
|
12
30
|
rspec (2.3.0)
|
@@ -17,12 +35,17 @@ GEM
|
|
17
35
|
rspec-expectations (2.3.0)
|
18
36
|
diff-lcs (~> 1.1.2)
|
19
37
|
rspec-mocks (2.3.0)
|
38
|
+
slop (2.4.0)
|
39
|
+
tzinfo (0.3.29)
|
20
40
|
|
21
41
|
PLATFORMS
|
22
42
|
ruby
|
23
43
|
|
24
44
|
DEPENDENCIES
|
45
|
+
activerecord
|
46
|
+
bio
|
25
47
|
bundler (~> 1.0.0)
|
26
48
|
jeweler (~> 1.6.4)
|
27
49
|
rcov
|
28
50
|
rspec (~> 2.3.0)
|
51
|
+
slop
|
data/Rakefile
CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{Use the snp-search toolset to query the SNP database}
|
22
22
|
gem.email = "ali.al-shahib@hpa.org.uk"
|
23
23
|
gem.authors = ["Ali Al-Shahib", "Anthony Underwood"]
|
24
|
+
gem.executables = ["snp-search"]
|
24
25
|
# dependencies defined in Gemfile
|
25
26
|
end
|
26
27
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -38,10 +39,9 @@ end
|
|
38
39
|
|
39
40
|
task :default => :spec
|
40
41
|
|
41
|
-
require '
|
42
|
-
|
42
|
+
require 'rdoc/task'
|
43
|
+
RDoc::Task.new do |rdoc|
|
43
44
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
-
|
45
45
|
rdoc.rdoc_dir = 'rdoc'
|
46
46
|
rdoc.title = "snp-search #{version}"
|
47
47
|
rdoc.rdoc_files.include('README*')
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/bin/snp-search
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'snp-search'
|
2
|
+
require 'snp_db_connection'
|
3
|
+
require 'snp_db_models'
|
4
|
+
require 'snp_db_schema'
|
5
|
+
require 'slop'
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
opts = Slop.new :help do
|
10
|
+
banner "ruby snp-search [OPTIONS]"
|
11
|
+
|
12
|
+
on :V, :verbose, 'Enable verbose mode'
|
13
|
+
on :n, :name=, 'Name of database', true
|
14
|
+
on :r, :reference_file=, 'Path for the reference database, in gbk or embl file format'
|
15
|
+
on :v, :vcf_file=, 'Path for the .vcf file', true
|
16
|
+
on :s, :strain, 'Path for the list of strains text file', true
|
17
|
+
on :c, :cuttoff_snp=, 'cuttoff for SNP quality'
|
18
|
+
on :t, :cuttoff_genotype=, 'cuttoff for genotype quality'
|
19
|
+
on_empty do
|
20
|
+
puts help
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
opts.parse
|
25
|
+
|
26
|
+
strains = []
|
27
|
+
File.read(opts[:strain]).each_line do |line|
|
28
|
+
strains << line.chop
|
29
|
+
end
|
30
|
+
|
31
|
+
# Enter the name of your database
|
32
|
+
establish_connection(opts[:name])
|
33
|
+
|
34
|
+
# Schema will run here
|
35
|
+
db_schema
|
36
|
+
|
37
|
+
ref = opts[:reference_file]
|
38
|
+
|
39
|
+
sequence_format = guess_sequence_format(ref)
|
40
|
+
|
41
|
+
case sequence_format
|
42
|
+
when :genbank
|
43
|
+
sequence_flatfile = Bio::FlatFile.open(Bio::GenBank,opts[:reference_file]).next_entry
|
44
|
+
when :embl
|
45
|
+
sequence_flatfile = Bio::FlatFile.open(Bio::EMBL,opts[:reference_file]).next_entry
|
46
|
+
else
|
47
|
+
puts "All sequence files should be of genbank or embl format"
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# path for vcf file here
|
53
|
+
vcf_mpileup_file = opts[:vcf_file]
|
54
|
+
|
55
|
+
|
56
|
+
# The populate_strains method populates the strains in the db. It uses the strain names in array.
|
57
|
+
populate_strains(strains)
|
58
|
+
|
59
|
+
# The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
|
60
|
+
populate_features_and_annotations(sequence_flatfile)
|
61
|
+
|
62
|
+
#The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
|
63
|
+
populate_snps_alleles_genotypes(strains, vcf_mpileup_file, opts[:cuttoff_snp].to_i, opts[:cuttoff_genotype].to_i)
|
@@ -1,7 +1,19 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bio'
|
3
3
|
require 'snp_db_models'
|
4
|
-
establish_connection
|
4
|
+
#establish_connection
|
5
|
+
|
6
|
+
def guess_sequence_format(reference_genome)
|
7
|
+
file_extension = File.extname(reference_genome).downcase
|
8
|
+
file_format = nil
|
9
|
+
case file_extension
|
10
|
+
when ".gbk", ".genbank", ".gb"
|
11
|
+
file_format = :genbank
|
12
|
+
when ".embl", ".emb"
|
13
|
+
file_format = :embl
|
14
|
+
end
|
15
|
+
return file_format
|
16
|
+
end
|
5
17
|
|
6
18
|
# A method to populate the strain names in the Strain table. strain_names is an array of strain names.
|
7
19
|
def populate_strains(strain_names)
|
@@ -16,8 +28,8 @@ end
|
|
16
28
|
# We include all features that are not 'source' or 'gene' as they are repetitive info. 'CDS' is the gene.
|
17
29
|
# The annotation table includes also the start and end coordinates of the CDS. The strand is also included. the 'locations' method is defined in bioruby under genbank. It must be required at the top (bio).
|
18
30
|
# Also, the qualifier and value are extracted from the embl file and added to the database.
|
19
|
-
def populate_features_and_annotations(
|
20
|
-
|
31
|
+
def populate_features_and_annotations(sequence_file)
|
32
|
+
sequence_file.features.each do |feature|
|
21
33
|
unless feature.feature == "source" || feature.feature == "gene"
|
22
34
|
db_feature = Feature.new
|
23
35
|
db_feature.start = feature.locations.first.from
|
@@ -43,7 +55,7 @@ end
|
|
43
55
|
#This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
|
44
56
|
# It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
|
45
57
|
|
46
|
-
def populate_snps_alleles_genotypes(strain_names, vcf_file)
|
58
|
+
def populate_snps_alleles_genotypes(strain_names, vcf_file, cuttoff_snp, cuttoff_genotype)
|
47
59
|
strains = Array.new
|
48
60
|
strain_names.each do |strain_name|
|
49
61
|
strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
|
@@ -84,13 +96,13 @@ def populate_snps_alleles_genotypes(strain_names, vcf_file)
|
|
84
96
|
genotypes.each_with_index do |gt, index|
|
85
97
|
if gt == "1/1"
|
86
98
|
variant_genotypes << index
|
87
|
-
if genotypes_qualities[index].to_i >=
|
99
|
+
if genotypes_qualities[index].to_i >= cuttoff_genotype
|
88
100
|
high_quality_variant_genotypes << index
|
89
101
|
end
|
90
102
|
end
|
91
103
|
end
|
92
104
|
|
93
|
-
if snp_qual.to_i >=
|
105
|
+
if snp_qual.to_i >= cuttoff_snp && genotypes.include?("1/1") && ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size # first condition checks the overall quality of the SNP is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant type with quality >= 30 and informative SNP
|
94
106
|
if genotypes.include?("0/0") && !genotypes.include?("0/1") # exclude SNPs which are all 1/1 i.e something strange about ref and those which have confusing heterozygote 0/1s
|
95
107
|
good_snps +=1
|
96
108
|
# puts good_snps
|
File without changes
|
@@ -1,6 +1,4 @@
|
|
1
|
-
|
2
|
-
establish_connection
|
3
|
-
|
1
|
+
def db_schema
|
4
2
|
ActiveRecord::Schema.define do
|
5
3
|
unless table_exists? :strains
|
6
4
|
create_table :strains do |t|
|
@@ -83,4 +81,5 @@ ActiveRecord::Schema.define do
|
|
83
81
|
unless index_exists? :annotations, :feature_id
|
84
82
|
add_index :annotations, :feature_id
|
85
83
|
end
|
84
|
+
end
|
86
85
|
end
|
data/snp-search.gemspec
CHANGED
@@ -5,13 +5,14 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "snp-search"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ali Al-Shahib", "Anthony Underwood"]
|
12
|
-
s.date = "2011-11-
|
12
|
+
s.date = "2011-11-30"
|
13
13
|
s.description = "Use the snp-search toolset to query the SNP database"
|
14
14
|
s.email = "ali.al-shahib@hpa.org.uk"
|
15
|
+
s.executables = ["snp-search"]
|
15
16
|
s.extra_rdoc_files = [
|
16
17
|
"LICENSE.txt",
|
17
18
|
"README",
|
@@ -26,12 +27,13 @@ Gem::Specification.new do |s|
|
|
26
27
|
"README.rdoc",
|
27
28
|
"Rakefile",
|
28
29
|
"VERSION",
|
29
|
-
"
|
30
|
-
"lib/
|
31
|
-
"lib/
|
32
|
-
"lib/
|
33
|
-
"lib/
|
30
|
+
"bin/snp-search",
|
31
|
+
"lib/snp-search/snp-search.rb",
|
32
|
+
"lib/snp-search/snp_db_connection.rb",
|
33
|
+
"lib/snp-search/snp_db_models.rb",
|
34
|
+
"lib/snp-search/snp_db_schema.rb",
|
34
35
|
"snp-search.gemspec",
|
36
|
+
"snp-search_test.rb",
|
35
37
|
"spec/snp-search_spec.rb",
|
36
38
|
"spec/spec_helper.rb"
|
37
39
|
]
|
@@ -45,17 +47,26 @@ Gem::Specification.new do |s|
|
|
45
47
|
s.specification_version = 3
|
46
48
|
|
47
49
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
50
|
+
s.add_runtime_dependency(%q<activerecord>, [">= 0"])
|
51
|
+
s.add_runtime_dependency(%q<bio>, [">= 0"])
|
52
|
+
s.add_runtime_dependency(%q<slop>, [">= 0"])
|
48
53
|
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
49
54
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
50
55
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
51
56
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
52
57
|
else
|
58
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
59
|
+
s.add_dependency(%q<bio>, [">= 0"])
|
60
|
+
s.add_dependency(%q<slop>, [">= 0"])
|
53
61
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
54
62
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
55
63
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
56
64
|
s.add_dependency(%q<rcov>, [">= 0"])
|
57
65
|
end
|
58
66
|
else
|
67
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
68
|
+
s.add_dependency(%q<bio>, [">= 0"])
|
69
|
+
s.add_dependency(%q<slop>, [">= 0"])
|
59
70
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
60
71
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
61
72
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
data/snp-search_test.rb
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bio'
|
3
|
+
require 'snp_db_models'
|
4
|
+
require 'snp_db_connection'
|
5
|
+
require 'snp_db_models'
|
6
|
+
require 'snp_db_schema'
|
7
|
+
require 'highline/import'
|
8
|
+
require 'pp'
|
9
|
+
require 'slop'
|
10
|
+
|
11
|
+
|
12
|
+
opts = Slop.new :help do
|
13
|
+
banner "ruby snp-search.rb [OPTIONS]"
|
14
|
+
|
15
|
+
on :v, :version, 'Display the version of App' do
|
16
|
+
puts 'Version 1.5!'
|
17
|
+
exit
|
18
|
+
end
|
19
|
+
|
20
|
+
on :V, :verbose, 'Enable verbose mode'
|
21
|
+
on :n, :name=, 'Name of database', true
|
22
|
+
on :r, :reference=, 'Path for the reference database', true
|
23
|
+
on :f, :vcf=, 'Path for the .vcf file', true
|
24
|
+
on :s, :strain, 'Path for the list of strains text file', true
|
25
|
+
on :c, :cuttoff, 'cuttoff for SNP quality', true
|
26
|
+
on_empty do
|
27
|
+
puts help
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.parse
|
32
|
+
|
33
|
+
strains = []
|
34
|
+
File.read(opts[:strain]).each_line do |line|
|
35
|
+
strains << line.chop
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# A method to populate the strain names in the Strain table. strain_names is an array of strain names.
|
40
|
+
def populate_strains(strain_names)
|
41
|
+
strain_names.each do |strain|
|
42
|
+
s = Strain.new
|
43
|
+
s.name = strain
|
44
|
+
s.save
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# A method to populate the database with the features (genes etc) and the annotations from the embl file.
|
49
|
+
# We include all features that are not 'source' or 'gene' as they are repetitive info. 'CDS' is the gene.
|
50
|
+
# The annotation table includes also the start and end coordinates of the CDS. The strand is also included. the 'locations' method is defined in bioruby under genbank. It must be required at the top (bio).
|
51
|
+
# Also, the qualifier and value are extracted from the embl file and added to the database.
|
52
|
+
def populate_features_and_annotations(embl_ncbi_file)
|
53
|
+
embl_ncbi_file.features.each do |feature|
|
54
|
+
unless feature.feature == "source" || feature.feature == "gene"
|
55
|
+
db_feature = Feature.new
|
56
|
+
db_feature.start = feature.locations.first.from
|
57
|
+
db_feature.end = feature.locations.first.to
|
58
|
+
db_feature.strand = feature.locations.first.strand
|
59
|
+
db_feature.name = feature.feature
|
60
|
+
db_feature.save
|
61
|
+
puts "populated #{db_feature.name}, start: #{db_feature.start}, end: #{db_feature.end}, strand: #{db_feature.strand} for feature: #{db_feature.id}"
|
62
|
+
# Populate the Annotation table with qualifier information from the genbank file
|
63
|
+
feature.qualifiers.each do |qualifier|
|
64
|
+
a = Annotation.new
|
65
|
+
a.qualifier = qualifier.qualifier
|
66
|
+
a.value = qualifier.value
|
67
|
+
a.save
|
68
|
+
db_feature.annotations << a
|
69
|
+
puts "populated #{a.qualifier} for feature: #{db_feature.id}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
#This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
|
77
|
+
# It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
|
78
|
+
|
79
|
+
def populate_snps_alleles_genotypes(strain_names, vcf_file)
|
80
|
+
strains = Array.new
|
81
|
+
strain_names.each do |strain_name|
|
82
|
+
strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
|
83
|
+
strains << strain
|
84
|
+
end
|
85
|
+
|
86
|
+
# open vcf file and parse each line
|
87
|
+
File.open(vcf_file) do |f|
|
88
|
+
# header names
|
89
|
+
header = f.gets
|
90
|
+
header2 = f.gets.chomp
|
91
|
+
column_headings = header2.split("\t")
|
92
|
+
sample_names = column_headings[9..-1]
|
93
|
+
|
94
|
+
good_snps = 0
|
95
|
+
# start parsing snps
|
96
|
+
while line = f.gets
|
97
|
+
details = line.split("\t")
|
98
|
+
ref = details[0]
|
99
|
+
ref_pos = details[1]
|
100
|
+
ref_base = details[3]
|
101
|
+
snp_base = details[4]
|
102
|
+
snp_qual = details [5]
|
103
|
+
samples = details[9..-1]
|
104
|
+
|
105
|
+
genotypes = samples.map do |s|
|
106
|
+
pl, gt, gq = s.chomp.split(":")
|
107
|
+
gt
|
108
|
+
end
|
109
|
+
|
110
|
+
genotypes_qualities = samples.map do |s|
|
111
|
+
pl, gt, gq = s.chomp.split(":")
|
112
|
+
gq
|
113
|
+
end
|
114
|
+
|
115
|
+
high_quality_variant_genotypes = Array.new # this will be filled with the indicies of genotypes that are "1/1" and have a quality >= 30
|
116
|
+
variant_genotypes = Array.new
|
117
|
+
genotypes.each_with_index do |gt, index|
|
118
|
+
if gt == "1/1"
|
119
|
+
variant_genotypes << index
|
120
|
+
if genotypes_qualities[index].to_i >= 30
|
121
|
+
high_quality_variant_genotypes << index
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
if snp_qual.to_i >= opts[:cuttoff] && genotypes.include?("1/1") && ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size # first condition checks the overall quality of the SNP is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant type with quality >= 30 and informative SNP
|
127
|
+
if genotypes.include?("0/0") && !genotypes.include?("0/1") # exclude SNPs which are all 1/1 i.e something strange about ref and those which have confusing heterozygote 0/1s
|
128
|
+
good_snps +=1
|
129
|
+
# puts good_snps
|
130
|
+
#create snp
|
131
|
+
s = Snp.new
|
132
|
+
s.ref_pos = ref_pos
|
133
|
+
s.save
|
134
|
+
puts "Adding Reference SNP position: #{ref_pos}"
|
135
|
+
|
136
|
+
# create ref allele
|
137
|
+
ref_allele = Allele.new
|
138
|
+
ref_allele.base = ref_base
|
139
|
+
ref_allele.snp = s
|
140
|
+
ref_allele.save
|
141
|
+
|
142
|
+
puts "Adding Reference SNP base: #{ref_base}"
|
143
|
+
|
144
|
+
s.reference_allele = ref_allele
|
145
|
+
s.save
|
146
|
+
|
147
|
+
# create snp allele
|
148
|
+
snp_allele = Allele.new
|
149
|
+
snp_allele.base = snp_base
|
150
|
+
snp_allele.snp = s
|
151
|
+
snp_allele.save
|
152
|
+
|
153
|
+
puts "Adding SNP base: #{snp_base}"
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
genotypes.each_with_index do |gt, index|
|
158
|
+
genotype = Genotype.new
|
159
|
+
genotype.strain = strains[index]
|
160
|
+
puts index if strains[index].nil?
|
161
|
+
# print "#{gt}(#{genotypes_qualities[index]}) "
|
162
|
+
if gt == "0/0" # wild type
|
163
|
+
genotype.allele = ref_allele
|
164
|
+
elsif gt == "1/1" # snp type
|
165
|
+
genotype.allele = snp_allele
|
166
|
+
else
|
167
|
+
puts "Strange SNP #{gt}"
|
168
|
+
end
|
169
|
+
genotype.save
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
end
|
176
|
+
#Here we link the features to snps.
|
177
|
+
Snp.all.each do |snp|
|
178
|
+
x = Feature.where("features.start <= ? AND features.end >= ?", snp.ref_pos, snp.ref_pos).first
|
179
|
+
snp.feature = x
|
180
|
+
snp.save
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
|
185
|
+
#puts opts[:name]
|
186
|
+
# Enter the name of your database
|
187
|
+
establish_connection(opts[:name])
|
188
|
+
|
189
|
+
# # Schema will run here
|
190
|
+
db_schema
|
191
|
+
|
192
|
+
# path for embl file here
|
193
|
+
#path_for_embl_file = ask("Please enter the full path for the embl reference file")
|
194
|
+
genome_sequence = Bio::FlatFile.open(Bio::EMBL,opts[:reference]).next_entry
|
195
|
+
|
196
|
+
# # path for vcf file here
|
197
|
+
vcf_mpileup_file = opts[:vcf]
|
198
|
+
|
199
|
+
|
200
|
+
# # The populate_strains method populates the strains in the db. It uses the strain names in array.
|
201
|
+
populate_strains(strains)
|
202
|
+
|
203
|
+
# # The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
|
204
|
+
populate_features_and_annotations(genome_sequence)
|
205
|
+
|
206
|
+
# #The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
|
207
|
+
populate_snps_alleles_genotypes(strains, vcf_mpileup_file)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snp-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,44 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2011-11-
|
13
|
+
date: 2011-11-30 00:00:00.000000000Z
|
14
14
|
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: activerecord
|
17
|
+
requirement: &2159603360 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *2159603360
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: bio
|
28
|
+
requirement: &2159602700 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *2159602700
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: slop
|
39
|
+
requirement: &2159602100 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
type: :runtime
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *2159602100
|
15
48
|
- !ruby/object:Gem::Dependency
|
16
49
|
name: rspec
|
17
|
-
requirement: &
|
50
|
+
requirement: &2159601440 !ruby/object:Gem::Requirement
|
18
51
|
none: false
|
19
52
|
requirements:
|
20
53
|
- - ~>
|
@@ -22,10 +55,10 @@ dependencies:
|
|
22
55
|
version: 2.3.0
|
23
56
|
type: :development
|
24
57
|
prerelease: false
|
25
|
-
version_requirements: *
|
58
|
+
version_requirements: *2159601440
|
26
59
|
- !ruby/object:Gem::Dependency
|
27
60
|
name: bundler
|
28
|
-
requirement: &
|
61
|
+
requirement: &2159600840 !ruby/object:Gem::Requirement
|
29
62
|
none: false
|
30
63
|
requirements:
|
31
64
|
- - ~>
|
@@ -33,10 +66,10 @@ dependencies:
|
|
33
66
|
version: 1.0.0
|
34
67
|
type: :development
|
35
68
|
prerelease: false
|
36
|
-
version_requirements: *
|
69
|
+
version_requirements: *2159600840
|
37
70
|
- !ruby/object:Gem::Dependency
|
38
71
|
name: jeweler
|
39
|
-
requirement: &
|
72
|
+
requirement: &2159600240 !ruby/object:Gem::Requirement
|
40
73
|
none: false
|
41
74
|
requirements:
|
42
75
|
- - ~>
|
@@ -44,10 +77,10 @@ dependencies:
|
|
44
77
|
version: 1.6.4
|
45
78
|
type: :development
|
46
79
|
prerelease: false
|
47
|
-
version_requirements: *
|
80
|
+
version_requirements: *2159600240
|
48
81
|
- !ruby/object:Gem::Dependency
|
49
82
|
name: rcov
|
50
|
-
requirement: &
|
83
|
+
requirement: &2159599640 !ruby/object:Gem::Requirement
|
51
84
|
none: false
|
52
85
|
requirements:
|
53
86
|
- - ! '>='
|
@@ -55,10 +88,11 @@ dependencies:
|
|
55
88
|
version: '0'
|
56
89
|
type: :development
|
57
90
|
prerelease: false
|
58
|
-
version_requirements: *
|
91
|
+
version_requirements: *2159599640
|
59
92
|
description: Use the snp-search toolset to query the SNP database
|
60
93
|
email: ali.al-shahib@hpa.org.uk
|
61
|
-
executables:
|
94
|
+
executables:
|
95
|
+
- snp-search
|
62
96
|
extensions: []
|
63
97
|
extra_rdoc_files:
|
64
98
|
- LICENSE.txt
|
@@ -73,12 +107,13 @@ files:
|
|
73
107
|
- README.rdoc
|
74
108
|
- Rakefile
|
75
109
|
- VERSION
|
76
|
-
-
|
77
|
-
- lib/
|
78
|
-
- lib/
|
79
|
-
- lib/
|
80
|
-
- lib/
|
110
|
+
- bin/snp-search
|
111
|
+
- lib/snp-search/snp-search.rb
|
112
|
+
- lib/snp-search/snp_db_connection.rb
|
113
|
+
- lib/snp-search/snp_db_models.rb
|
114
|
+
- lib/snp-search/snp_db_schema.rb
|
81
115
|
- snp-search.gemspec
|
116
|
+
- snp-search_test.rb
|
82
117
|
- spec/snp-search_spec.rb
|
83
118
|
- spec/spec_helper.rb
|
84
119
|
homepage: http://github.com/hpa-bioinformatics/snp-search
|
@@ -96,7 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
96
131
|
version: '0'
|
97
132
|
segments:
|
98
133
|
- 0
|
99
|
-
hash:
|
134
|
+
hash: -4259420816018168147
|
100
135
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
136
|
none: false
|
102
137
|
requirements:
|
data/lib/user_entry_file.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'snp-search'
|
2
|
-
|
3
|
-
#path for embl file here
|
4
|
-
genome_sequence = Bio::FlatFile.open(Bio::EMBL, "path_for_embl_file_here").next_entry
|
5
|
-
|
6
|
-
#path for vcf file here
|
7
|
-
vcf_mpileup_file = "path_for_vcf_file_here"
|
8
|
-
|
9
|
-
#array of strain names here
|
10
|
-
strains = ["STRAIN_NAME_1", "STRAIN_NAME_2"]
|
11
|
-
|
12
|
-
# Thats it, you job is done here.
|
13
|
-
|
14
|
-
# The populate_strains method populates the strains in the db. It uses the strain names in array.
|
15
|
-
populate_strains(strains)
|
16
|
-
|
17
|
-
# The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
|
18
|
-
populate_features_and_annotations(genome_sequence)
|
19
|
-
|
20
|
-
# The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
|
21
|
-
populate_snps_alleles_genotypes(strains, vcf_mpileup_file)
|