snp-search 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.4.0
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "snp-search"
8
- s.version = "0.3.0"
8
+ s.version = "0.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ali Al-Shahib", "Anthony Underwood"]
@@ -28,12 +28,11 @@ Gem::Specification.new do |s|
28
28
  "Rakefile",
29
29
  "VERSION",
30
30
  "bin/snp-search",
31
- "lib/snp-search/snp-search.rb",
32
- "lib/snp-search/snp_db_connection.rb",
33
- "lib/snp-search/snp_db_models.rb",
34
- "lib/snp-search/snp_db_schema.rb",
31
+ "lib/snp-search.rb",
32
+ "lib/snp_db_connection.rb",
33
+ "lib/snp_db_models.rb",
34
+ "lib/snp_db_schema.rb",
35
35
  "snp-search.gemspec",
36
- "snp-search_test.rb",
37
36
  "spec/snp-search_spec.rb",
38
37
  "spec/spec_helper.rb"
39
38
  ]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snp-search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -14,7 +14,7 @@ date: 2011-11-30 00:00:00.000000000Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord
17
- requirement: &2159603360 !ruby/object:Gem::Requirement
17
+ requirement: &2155288080 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2159603360
25
+ version_requirements: *2155288080
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: bio
28
- requirement: &2159602700 !ruby/object:Gem::Requirement
28
+ requirement: &2155287480 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2159602700
36
+ version_requirements: *2155287480
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: slop
39
- requirement: &2159602100 !ruby/object:Gem::Requirement
39
+ requirement: &2155286880 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: '0'
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *2159602100
47
+ version_requirements: *2155286880
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &2159601440 !ruby/object:Gem::Requirement
50
+ requirement: &2155286300 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.3.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *2159601440
58
+ version_requirements: *2155286300
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: bundler
61
- requirement: &2159600840 !ruby/object:Gem::Requirement
61
+ requirement: &2155285700 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: 1.0.0
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *2159600840
69
+ version_requirements: *2155285700
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: jeweler
72
- requirement: &2159600240 !ruby/object:Gem::Requirement
72
+ requirement: &2155285120 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ~>
@@ -77,10 +77,10 @@ dependencies:
77
77
  version: 1.6.4
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *2159600240
80
+ version_requirements: *2155285120
81
81
  - !ruby/object:Gem::Dependency
82
82
  name: rcov
83
- requirement: &2159599640 !ruby/object:Gem::Requirement
83
+ requirement: &2155284520 !ruby/object:Gem::Requirement
84
84
  none: false
85
85
  requirements:
86
86
  - - ! '>='
@@ -88,7 +88,7 @@ dependencies:
88
88
  version: '0'
89
89
  type: :development
90
90
  prerelease: false
91
- version_requirements: *2159599640
91
+ version_requirements: *2155284520
92
92
  description: Use the snp-search toolset to query the SNP database
93
93
  email: ali.al-shahib@hpa.org.uk
94
94
  executables:
@@ -108,12 +108,11 @@ files:
108
108
  - Rakefile
109
109
  - VERSION
110
110
  - bin/snp-search
111
- - lib/snp-search/snp-search.rb
112
- - lib/snp-search/snp_db_connection.rb
113
- - lib/snp-search/snp_db_models.rb
114
- - lib/snp-search/snp_db_schema.rb
111
+ - lib/snp-search.rb
112
+ - lib/snp_db_connection.rb
113
+ - lib/snp_db_models.rb
114
+ - lib/snp_db_schema.rb
115
115
  - snp-search.gemspec
116
- - snp-search_test.rb
117
116
  - spec/snp-search_spec.rb
118
117
  - spec/spec_helper.rb
119
118
  homepage: http://github.com/hpa-bioinformatics/snp-search
@@ -131,7 +130,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
131
130
  version: '0'
132
131
  segments:
133
132
  - 0
134
- hash: -4259420816018168147
133
+ hash: 3021040479965194059
135
134
  required_rubygems_version: !ruby/object:Gem::Requirement
136
135
  none: false
137
136
  requirements:
@@ -1,207 +0,0 @@
1
- require 'rubygems'
2
- require 'bio'
3
- require 'snp_db_models'
4
- require 'snp_db_connection'
5
- require 'snp_db_models'
6
- require 'snp_db_schema'
7
- require 'highline/import'
8
- require 'pp'
9
- require 'slop'
10
-
11
-
12
- opts = Slop.new :help do
13
- banner "ruby snp-search.rb [OPTIONS]"
14
-
15
- on :v, :version, 'Display the version of App' do
16
- puts 'Version 1.5!'
17
- exit
18
- end
19
-
20
- on :V, :verbose, 'Enable verbose mode'
21
- on :n, :name=, 'Name of database', true
22
- on :r, :reference=, 'Path for the reference database', true
23
- on :f, :vcf=, 'Path for the .vcf file', true
24
- on :s, :strain, 'Path for the list of strains text file', true
25
- on :c, :cuttoff, 'cuttoff for SNP quality', true
26
- on_empty do
27
- puts help
28
- end
29
- end
30
-
31
- opts.parse
32
-
33
- strains = []
34
- File.read(opts[:strain]).each_line do |line|
35
- strains << line.chop
36
- end
37
-
38
-
39
- # A method to populate the strain names in the Strain table. strain_names is an array of strain names.
40
- def populate_strains(strain_names)
41
- strain_names.each do |strain|
42
- s = Strain.new
43
- s.name = strain
44
- s.save
45
- end
46
- end
47
-
48
- # A method to populate the database with the features (genes etc) and the annotations from the embl file.
49
- # We include all features that are not 'source' or 'gene' as they are repetitive info. 'CDS' is the gene.
50
- # The annotation table includes also the start and end coordinates of the CDS. The strand is also included. the 'locations' method is defined in bioruby under genbank. It must be required at the top (bio).
51
- # Also, the qualifier and value are extracted from the embl file and added to the database.
52
- def populate_features_and_annotations(embl_ncbi_file)
53
- embl_ncbi_file.features.each do |feature|
54
- unless feature.feature == "source" || feature.feature == "gene"
55
- db_feature = Feature.new
56
- db_feature.start = feature.locations.first.from
57
- db_feature.end = feature.locations.first.to
58
- db_feature.strand = feature.locations.first.strand
59
- db_feature.name = feature.feature
60
- db_feature.save
61
- puts "populated #{db_feature.name}, start: #{db_feature.start}, end: #{db_feature.end}, strand: #{db_feature.strand} for feature: #{db_feature.id}"
62
- # Populate the Annotation table with qualifier information from the genbank file
63
- feature.qualifiers.each do |qualifier|
64
- a = Annotation.new
65
- a.qualifier = qualifier.qualifier
66
- a.value = qualifier.value
67
- a.save
68
- db_feature.annotations << a
69
- puts "populated #{a.qualifier} for feature: #{db_feature.id}"
70
- end
71
- end
72
- end
73
- end
74
-
75
-
76
- #This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
77
- # It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
78
-
79
- def populate_snps_alleles_genotypes(strain_names, vcf_file)
80
- strains = Array.new
81
- strain_names.each do |strain_name|
82
- strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
83
- strains << strain
84
- end
85
-
86
- # open vcf file and parse each line
87
- File.open(vcf_file) do |f|
88
- # header names
89
- header = f.gets
90
- header2 = f.gets.chomp
91
- column_headings = header2.split("\t")
92
- sample_names = column_headings[9..-1]
93
-
94
- good_snps = 0
95
- # start parsing snps
96
- while line = f.gets
97
- details = line.split("\t")
98
- ref = details[0]
99
- ref_pos = details[1]
100
- ref_base = details[3]
101
- snp_base = details[4]
102
- snp_qual = details [5]
103
- samples = details[9..-1]
104
-
105
- genotypes = samples.map do |s|
106
- pl, gt, gq = s.chomp.split(":")
107
- gt
108
- end
109
-
110
- genotypes_qualities = samples.map do |s|
111
- pl, gt, gq = s.chomp.split(":")
112
- gq
113
- end
114
-
115
- high_quality_variant_genotypes = Array.new # this will be filled with the indicies of genotypes that are "1/1" and have a quality >= 30
116
- variant_genotypes = Array.new
117
- genotypes.each_with_index do |gt, index|
118
- if gt == "1/1"
119
- variant_genotypes << index
120
- if genotypes_qualities[index].to_i >= 30
121
- high_quality_variant_genotypes << index
122
- end
123
- end
124
- end
125
-
126
- if snp_qual.to_i >= opts[:cuttoff] && genotypes.include?("1/1") && ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size # first condition checks the overall quality of the SNP is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant type with quality >= 30 and informative SNP
127
- if genotypes.include?("0/0") && !genotypes.include?("0/1") # exclude SNPs which are all 1/1 i.e something strange about ref and those which have confusing heterozygote 0/1s
128
- good_snps +=1
129
- # puts good_snps
130
- #create snp
131
- s = Snp.new
132
- s.ref_pos = ref_pos
133
- s.save
134
- puts "Adding Reference SNP position: #{ref_pos}"
135
-
136
- # create ref allele
137
- ref_allele = Allele.new
138
- ref_allele.base = ref_base
139
- ref_allele.snp = s
140
- ref_allele.save
141
-
142
- puts "Adding Reference SNP base: #{ref_base}"
143
-
144
- s.reference_allele = ref_allele
145
- s.save
146
-
147
- # create snp allele
148
- snp_allele = Allele.new
149
- snp_allele.base = snp_base
150
- snp_allele.snp = s
151
- snp_allele.save
152
-
153
- puts "Adding SNP base: #{snp_base}"
154
-
155
-
156
-
157
- genotypes.each_with_index do |gt, index|
158
- genotype = Genotype.new
159
- genotype.strain = strains[index]
160
- puts index if strains[index].nil?
161
- # print "#{gt}(#{genotypes_qualities[index]}) "
162
- if gt == "0/0" # wild type
163
- genotype.allele = ref_allele
164
- elsif gt == "1/1" # snp type
165
- genotype.allele = snp_allele
166
- else
167
- puts "Strange SNP #{gt}"
168
- end
169
- genotype.save
170
- end
171
- end
172
- end
173
-
174
- end
175
- end
176
- #Here we link the features to snps.
177
- Snp.all.each do |snp|
178
- x = Feature.where("features.start <= ? AND features.end >= ?", snp.ref_pos, snp.ref_pos).first
179
- snp.feature = x
180
- snp.save
181
- end
182
- end
183
-
184
-
185
- #puts opts[:name]
186
- # Enter the name of your database
187
- establish_connection(opts[:name])
188
-
189
- # # Schema will run here
190
- db_schema
191
-
192
- # path for embl file here
193
- #path_for_embl_file = ask("Please enter the full path for the embl reference file")
194
- genome_sequence = Bio::FlatFile.open(Bio::EMBL,opts[:reference]).next_entry
195
-
196
- # # path for vcf file here
197
- vcf_mpileup_file = opts[:vcf]
198
-
199
-
200
- # # The populate_strains method populates the strains in the db. It uses the strain names in array.
201
- populate_strains(strains)
202
-
203
- # # The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
204
- populate_features_and_annotations(genome_sequence)
205
-
206
- # #The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
207
- populate_snps_alleles_genotypes(strains, vcf_mpileup_file)