snp-search 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -3,6 +3,10 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
 
6
+ gem "activerecord"
7
+ gem "bio"
8
+ gem "slop"
9
+
6
10
  # Add dependencies to develop your gem here.
7
11
  # Include everything needed to run rake, tests, features, etc.
8
12
  group :development do
@@ -10,4 +14,5 @@ group :development do
10
14
  gem "bundler", "~> 1.0.0"
11
15
  gem "jeweler", "~> 1.6.4"
12
16
  gem "rcov", ">= 0"
13
- end
17
+
18
+ end
@@ -1,12 +1,30 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ activemodel (3.1.0)
5
+ activesupport (= 3.1.0)
6
+ bcrypt-ruby (~> 3.0.0)
7
+ builder (~> 3.0.0)
8
+ i18n (~> 0.6)
9
+ activerecord (3.1.0)
10
+ activemodel (= 3.1.0)
11
+ activesupport (= 3.1.0)
12
+ arel (~> 2.2.1)
13
+ tzinfo (~> 0.3.29)
14
+ activesupport (3.1.0)
15
+ multi_json (~> 1.0)
16
+ arel (2.2.1)
17
+ bcrypt-ruby (3.0.0)
18
+ bio (1.4.2)
19
+ builder (3.0.0)
4
20
  diff-lcs (1.1.3)
5
21
  git (1.2.5)
22
+ i18n (0.6.0)
6
23
  jeweler (1.6.4)
7
24
  bundler (~> 1.0)
8
25
  git (>= 1.2.5)
9
26
  rake
27
+ multi_json (1.0.3)
10
28
  rake (0.9.2.2)
11
29
  rcov (0.9.11)
12
30
  rspec (2.3.0)
@@ -17,12 +35,17 @@ GEM
17
35
  rspec-expectations (2.3.0)
18
36
  diff-lcs (~> 1.1.2)
19
37
  rspec-mocks (2.3.0)
38
+ slop (2.4.0)
39
+ tzinfo (0.3.29)
20
40
 
21
41
  PLATFORMS
22
42
  ruby
23
43
 
24
44
  DEPENDENCIES
45
+ activerecord
46
+ bio
25
47
  bundler (~> 1.0.0)
26
48
  jeweler (~> 1.6.4)
27
49
  rcov
28
50
  rspec (~> 2.3.0)
51
+ slop
data/Rakefile CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
21
21
  gem.description = %Q{Use the snp-search toolset to query the SNP database}
22
22
  gem.email = "ali.al-shahib@hpa.org.uk"
23
23
  gem.authors = ["Ali Al-Shahib", "Anthony Underwood"]
24
+ gem.executables = ["snp-search"]
24
25
  # dependencies defined in Gemfile
25
26
  end
26
27
  Jeweler::RubygemsDotOrgTasks.new
@@ -38,10 +39,9 @@ end
38
39
 
39
40
  task :default => :spec
40
41
 
41
- require 'rake/rdoctask'
42
- Rake::RDocTask.new do |rdoc|
42
+ require 'rdoc/task'
43
+ RDoc::Task.new do |rdoc|
43
44
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
-
45
45
  rdoc.rdoc_dir = 'rdoc'
46
46
  rdoc.title = "snp-search #{version}"
47
47
  rdoc.rdoc_files.include('README*')
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.3.0
@@ -0,0 +1,63 @@
1
+ require 'snp-search'
2
+ require 'snp_db_connection'
3
+ require 'snp_db_models'
4
+ require 'snp_db_schema'
5
+ require 'slop'
6
+
7
+
8
+
9
+ opts = Slop.new :help do
10
+ banner "ruby snp-search [OPTIONS]"
11
+
12
+ on :V, :verbose, 'Enable verbose mode'
13
+ on :n, :name=, 'Name of database', true
14
+ on :r, :reference_file=, 'Path for the reference database, in gbk or embl file format'
15
+ on :v, :vcf_file=, 'Path for the .vcf file', true
16
+ on :s, :strain, 'Path for the list of strains text file', true
17
+ on :c, :cuttoff_snp=, 'cuttoff for SNP quality'
18
+ on :t, :cuttoff_genotype=, 'cuttoff for genotype quality'
19
+ on_empty do
20
+ puts help
21
+ end
22
+ end
23
+
24
+ opts.parse
25
+
26
+ strains = []
27
+ File.read(opts[:strain]).each_line do |line|
28
+ strains << line.chop
29
+ end
30
+
31
+ # Enter the name of your database
32
+ establish_connection(opts[:name])
33
+
34
+ # Schema will run here
35
+ db_schema
36
+
37
+ ref = opts[:reference_file]
38
+
39
+ sequence_format = guess_sequence_format(ref)
40
+
41
+ case sequence_format
42
+ when :genbank
43
+ sequence_flatfile = Bio::FlatFile.open(Bio::GenBank,opts[:reference_file]).next_entry
44
+ when :embl
45
+ sequence_flatfile = Bio::FlatFile.open(Bio::EMBL,opts[:reference_file]).next_entry
46
+ else
47
+ puts "All sequence files should be of genbank or embl format"
48
+ exit
49
+ end
50
+
51
+
52
+ # path for vcf file here
53
+ vcf_mpileup_file = opts[:vcf_file]
54
+
55
+
56
+ # The populate_strains method populates the strains in the db. It uses the strain names in array.
57
+ populate_strains(strains)
58
+
59
+ # The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
60
+ populate_features_and_annotations(sequence_flatfile)
61
+
62
+ #The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
63
+ populate_snps_alleles_genotypes(strains, vcf_mpileup_file, opts[:cuttoff_snp].to_i, opts[:cuttoff_genotype].to_i)
@@ -1,7 +1,19 @@
1
1
  require 'rubygems'
2
2
  require 'bio'
3
3
  require 'snp_db_models'
4
- establish_connection
4
+ #establish_connection
5
+
6
+ def guess_sequence_format(reference_genome)
7
+ file_extension = File.extname(reference_genome).downcase
8
+ file_format = nil
9
+ case file_extension
10
+ when ".gbk", ".genbank", ".gb"
11
+ file_format = :genbank
12
+ when ".embl", ".emb"
13
+ file_format = :embl
14
+ end
15
+ return file_format
16
+ end
5
17
 
6
18
  # A method to populate the strain names in the Strain table. strain_names is an array of strain names.
7
19
  def populate_strains(strain_names)
@@ -16,8 +28,8 @@ end
16
28
  # We include all features that are not 'source' or 'gene' as they are repetitive info. 'CDS' is the gene.
17
29
  # The annotation table includes also the start and end coordinates of the CDS. The strand is also included. the 'locations' method is defined in bioruby under genbank. It must be required at the top (bio).
18
30
  # Also, the qualifier and value are extracted from the embl file and added to the database.
19
- def populate_features_and_annotations(embl_ncbi_file)
20
- embl_ncbi_file.features.each do |feature|
31
+ def populate_features_and_annotations(sequence_file)
32
+ sequence_file.features.each do |feature|
21
33
  unless feature.feature == "source" || feature.feature == "gene"
22
34
  db_feature = Feature.new
23
35
  db_feature.start = feature.locations.first.from
@@ -43,7 +55,7 @@ end
43
55
  #This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
44
56
  # It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
45
57
 
46
- def populate_snps_alleles_genotypes(strain_names, vcf_file)
58
+ def populate_snps_alleles_genotypes(strain_names, vcf_file, cuttoff_snp, cuttoff_genotype)
47
59
  strains = Array.new
48
60
  strain_names.each do |strain_name|
49
61
  strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
@@ -84,13 +96,13 @@ def populate_snps_alleles_genotypes(strain_names, vcf_file)
84
96
  genotypes.each_with_index do |gt, index|
85
97
  if gt == "1/1"
86
98
  variant_genotypes << index
87
- if genotypes_qualities[index].to_i >= 30
99
+ if genotypes_qualities[index].to_i >= cuttoff_genotype
88
100
  high_quality_variant_genotypes << index
89
101
  end
90
102
  end
91
103
  end
92
104
 
93
- if snp_qual.to_i >= 90 && genotypes.include?("1/1") && ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size # first condition checks the overall quality of the SNP is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant type with quality >= 30 and informative SNP
105
+ if snp_qual.to_i >= cuttoff_snp && genotypes.include?("1/1") && ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size # first condition checks the overall quality of the SNP is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant type with quality >= 30 and informative SNP
94
106
  if genotypes.include?("0/0") && !genotypes.include?("0/1") # exclude SNPs which are all 1/1 i.e something strange about ref and those which have confusing heterozygote 0/1s
95
107
  good_snps +=1
96
108
  # puts good_snps
@@ -1,5 +1,5 @@
1
1
  require 'active_record'
2
- def establish_connection(db_location= "snp.db.sqlite3")
2
+ def establish_connection(db_location)
3
3
  ActiveRecord::Base.establish_connection(
4
4
  :adapter => "sqlite3",
5
5
  :database => db_location,
@@ -1,6 +1,4 @@
1
- require 'snp_db_connection'
2
- establish_connection
3
-
1
+ def db_schema
4
2
  ActiveRecord::Schema.define do
5
3
  unless table_exists? :strains
6
4
  create_table :strains do |t|
@@ -83,4 +81,5 @@ ActiveRecord::Schema.define do
83
81
  unless index_exists? :annotations, :feature_id
84
82
  add_index :annotations, :feature_id
85
83
  end
84
+ end
86
85
  end
@@ -5,13 +5,14 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "snp-search"
8
- s.version = "0.2.0"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ali Al-Shahib", "Anthony Underwood"]
12
- s.date = "2011-11-25"
12
+ s.date = "2011-11-30"
13
13
  s.description = "Use the snp-search toolset to query the SNP database"
14
14
  s.email = "ali.al-shahib@hpa.org.uk"
15
+ s.executables = ["snp-search"]
15
16
  s.extra_rdoc_files = [
16
17
  "LICENSE.txt",
17
18
  "README",
@@ -26,12 +27,13 @@ Gem::Specification.new do |s|
26
27
  "README.rdoc",
27
28
  "Rakefile",
28
29
  "VERSION",
29
- "lib/snp-search.rb",
30
- "lib/snp_db_connection.rb",
31
- "lib/snp_db_models.rb",
32
- "lib/snp_db_schema.rb",
33
- "lib/user_entry_file.rb",
30
+ "bin/snp-search",
31
+ "lib/snp-search/snp-search.rb",
32
+ "lib/snp-search/snp_db_connection.rb",
33
+ "lib/snp-search/snp_db_models.rb",
34
+ "lib/snp-search/snp_db_schema.rb",
34
35
  "snp-search.gemspec",
36
+ "snp-search_test.rb",
35
37
  "spec/snp-search_spec.rb",
36
38
  "spec/spec_helper.rb"
37
39
  ]
@@ -45,17 +47,26 @@ Gem::Specification.new do |s|
45
47
  s.specification_version = 3
46
48
 
47
49
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<activerecord>, [">= 0"])
51
+ s.add_runtime_dependency(%q<bio>, [">= 0"])
52
+ s.add_runtime_dependency(%q<slop>, [">= 0"])
48
53
  s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
49
54
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
50
55
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
51
56
  s.add_development_dependency(%q<rcov>, [">= 0"])
52
57
  else
58
+ s.add_dependency(%q<activerecord>, [">= 0"])
59
+ s.add_dependency(%q<bio>, [">= 0"])
60
+ s.add_dependency(%q<slop>, [">= 0"])
53
61
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
54
62
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
55
63
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
56
64
  s.add_dependency(%q<rcov>, [">= 0"])
57
65
  end
58
66
  else
67
+ s.add_dependency(%q<activerecord>, [">= 0"])
68
+ s.add_dependency(%q<bio>, [">= 0"])
69
+ s.add_dependency(%q<slop>, [">= 0"])
59
70
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
60
71
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
61
72
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
@@ -0,0 +1,207 @@
1
+ require 'rubygems'
2
+ require 'bio'
3
+ require 'snp_db_models'
4
+ require 'snp_db_connection'
5
+ require 'snp_db_models'
6
+ require 'snp_db_schema'
7
+ require 'highline/import'
8
+ require 'pp'
9
+ require 'slop'
10
+
11
+
12
+ opts = Slop.new :help do
13
+ banner "ruby snp-search.rb [OPTIONS]"
14
+
15
+ on :v, :version, 'Display the version of App' do
16
+ puts 'Version 1.5!'
17
+ exit
18
+ end
19
+
20
+ on :V, :verbose, 'Enable verbose mode'
21
+ on :n, :name=, 'Name of database', true
22
+ on :r, :reference=, 'Path for the reference database', true
23
+ on :f, :vcf=, 'Path for the .vcf file', true
24
+ on :s, :strain, 'Path for the list of strains text file', true
25
+ on :c, :cuttoff, 'cuttoff for SNP quality', true
26
+ on_empty do
27
+ puts help
28
+ end
29
+ end
30
+
31
+ opts.parse
32
+
33
+ strains = []
34
+ File.read(opts[:strain]).each_line do |line|
35
+ strains << line.chop
36
+ end
37
+
38
+
39
+ # A method to populate the strain names in the Strain table. strain_names is an array of strain names.
40
+ def populate_strains(strain_names)
41
+ strain_names.each do |strain|
42
+ s = Strain.new
43
+ s.name = strain
44
+ s.save
45
+ end
46
+ end
47
+
48
+ # A method to populate the database with the features (genes etc) and the annotations from the embl file.
49
+ # We include all features that are not 'source' or 'gene' as they are repetitive info. 'CDS' is the gene.
50
+ # The annotation table includes also the start and end coordinates of the CDS. The strand is also included. the 'locations' method is defined in bioruby under genbank. It must be required at the top (bio).
51
+ # Also, the qualifier and value are extracted from the embl file and added to the database.
52
+ def populate_features_and_annotations(embl_ncbi_file)
53
+ embl_ncbi_file.features.each do |feature|
54
+ unless feature.feature == "source" || feature.feature == "gene"
55
+ db_feature = Feature.new
56
+ db_feature.start = feature.locations.first.from
57
+ db_feature.end = feature.locations.first.to
58
+ db_feature.strand = feature.locations.first.strand
59
+ db_feature.name = feature.feature
60
+ db_feature.save
61
+ puts "populated #{db_feature.name}, start: #{db_feature.start}, end: #{db_feature.end}, strand: #{db_feature.strand} for feature: #{db_feature.id}"
62
+ # Populate the Annotation table with qualifier information from the genbank file
63
+ feature.qualifiers.each do |qualifier|
64
+ a = Annotation.new
65
+ a.qualifier = qualifier.qualifier
66
+ a.value = qualifier.value
67
+ a.save
68
+ db_feature.annotations << a
69
+ puts "populated #{a.qualifier} for feature: #{db_feature.id}"
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+
76
+ #This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
77
+ # It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
78
+
79
+ def populate_snps_alleles_genotypes(strain_names, vcf_file)
80
+ strains = Array.new
81
+ strain_names.each do |strain_name|
82
+ strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
83
+ strains << strain
84
+ end
85
+
86
+ # open vcf file and parse each line
87
+ File.open(vcf_file) do |f|
88
+ # header names
89
+ header = f.gets
90
+ header2 = f.gets.chomp
91
+ column_headings = header2.split("\t")
92
+ sample_names = column_headings[9..-1]
93
+
94
+ good_snps = 0
95
+ # start parsing snps
96
+ while line = f.gets
97
+ details = line.split("\t")
98
+ ref = details[0]
99
+ ref_pos = details[1]
100
+ ref_base = details[3]
101
+ snp_base = details[4]
102
+ snp_qual = details [5]
103
+ samples = details[9..-1]
104
+
105
+ genotypes = samples.map do |s|
106
+ pl, gt, gq = s.chomp.split(":")
107
+ gt
108
+ end
109
+
110
+ genotypes_qualities = samples.map do |s|
111
+ pl, gt, gq = s.chomp.split(":")
112
+ gq
113
+ end
114
+
115
+ high_quality_variant_genotypes = Array.new # this will be filled with the indicies of genotypes that are "1/1" and have a quality >= 30
116
+ variant_genotypes = Array.new
117
+ genotypes.each_with_index do |gt, index|
118
+ if gt == "1/1"
119
+ variant_genotypes << index
120
+ if genotypes_qualities[index].to_i >= 30
121
+ high_quality_variant_genotypes << index
122
+ end
123
+ end
124
+ end
125
+
126
+ if snp_qual.to_i >= opts[:cuttoff] && genotypes.include?("1/1") && ! high_quality_variant_genotypes.empty? && high_quality_variant_genotypes.size == variant_genotypes.size # first condition checks the overall quality of the SNP is >=90, second checks that at least one genome has the 'homozygous' 1/1 variant type with quality >= 30 and informative SNP
127
+ if genotypes.include?("0/0") && !genotypes.include?("0/1") # exclude SNPs which are all 1/1 i.e something strange about ref and those which have confusing heterozygote 0/1s
128
+ good_snps +=1
129
+ # puts good_snps
130
+ #create snp
131
+ s = Snp.new
132
+ s.ref_pos = ref_pos
133
+ s.save
134
+ puts "Adding Reference SNP position: #{ref_pos}"
135
+
136
+ # create ref allele
137
+ ref_allele = Allele.new
138
+ ref_allele.base = ref_base
139
+ ref_allele.snp = s
140
+ ref_allele.save
141
+
142
+ puts "Adding Reference SNP base: #{ref_base}"
143
+
144
+ s.reference_allele = ref_allele
145
+ s.save
146
+
147
+ # create snp allele
148
+ snp_allele = Allele.new
149
+ snp_allele.base = snp_base
150
+ snp_allele.snp = s
151
+ snp_allele.save
152
+
153
+ puts "Adding SNP base: #{snp_base}"
154
+
155
+
156
+
157
+ genotypes.each_with_index do |gt, index|
158
+ genotype = Genotype.new
159
+ genotype.strain = strains[index]
160
+ puts index if strains[index].nil?
161
+ # print "#{gt}(#{genotypes_qualities[index]}) "
162
+ if gt == "0/0" # wild type
163
+ genotype.allele = ref_allele
164
+ elsif gt == "1/1" # snp type
165
+ genotype.allele = snp_allele
166
+ else
167
+ puts "Strange SNP #{gt}"
168
+ end
169
+ genotype.save
170
+ end
171
+ end
172
+ end
173
+
174
+ end
175
+ end
176
+ #Here we link the features to snps.
177
+ Snp.all.each do |snp|
178
+ x = Feature.where("features.start <= ? AND features.end >= ?", snp.ref_pos, snp.ref_pos).first
179
+ snp.feature = x
180
+ snp.save
181
+ end
182
+ end
183
+
184
+
185
+ #puts opts[:name]
186
+ # Enter the name of your database
187
+ establish_connection(opts[:name])
188
+
189
+ # # Schema will run here
190
+ db_schema
191
+
192
+ # path for embl file here
193
+ #path_for_embl_file = ask("Please enter the full path for the embl reference file")
194
+ genome_sequence = Bio::FlatFile.open(Bio::EMBL,opts[:reference]).next_entry
195
+
196
+ # # path for vcf file here
197
+ vcf_mpileup_file = opts[:vcf]
198
+
199
+
200
+ # # The populate_strains method populates the strains in the db. It uses the strain names in array.
201
+ populate_strains(strains)
202
+
203
+ # # The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
204
+ populate_features_and_annotations(genome_sequence)
205
+
206
+ # #The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
207
+ populate_snps_alleles_genotypes(strains, vcf_mpileup_file)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snp-search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,44 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2011-11-25 00:00:00.000000000Z
13
+ date: 2011-11-30 00:00:00.000000000Z
14
14
  dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activerecord
17
+ requirement: &2159603360 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *2159603360
26
+ - !ruby/object:Gem::Dependency
27
+ name: bio
28
+ requirement: &2159602700 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: *2159602700
37
+ - !ruby/object:Gem::Dependency
38
+ name: slop
39
+ requirement: &2159602100 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ type: :runtime
46
+ prerelease: false
47
+ version_requirements: *2159602100
15
48
  - !ruby/object:Gem::Dependency
16
49
  name: rspec
17
- requirement: &2161702220 !ruby/object:Gem::Requirement
50
+ requirement: &2159601440 !ruby/object:Gem::Requirement
18
51
  none: false
19
52
  requirements:
20
53
  - - ~>
@@ -22,10 +55,10 @@ dependencies:
22
55
  version: 2.3.0
23
56
  type: :development
24
57
  prerelease: false
25
- version_requirements: *2161702220
58
+ version_requirements: *2159601440
26
59
  - !ruby/object:Gem::Dependency
27
60
  name: bundler
28
- requirement: &2161701620 !ruby/object:Gem::Requirement
61
+ requirement: &2159600840 !ruby/object:Gem::Requirement
29
62
  none: false
30
63
  requirements:
31
64
  - - ~>
@@ -33,10 +66,10 @@ dependencies:
33
66
  version: 1.0.0
34
67
  type: :development
35
68
  prerelease: false
36
- version_requirements: *2161701620
69
+ version_requirements: *2159600840
37
70
  - !ruby/object:Gem::Dependency
38
71
  name: jeweler
39
- requirement: &2161701080 !ruby/object:Gem::Requirement
72
+ requirement: &2159600240 !ruby/object:Gem::Requirement
40
73
  none: false
41
74
  requirements:
42
75
  - - ~>
@@ -44,10 +77,10 @@ dependencies:
44
77
  version: 1.6.4
45
78
  type: :development
46
79
  prerelease: false
47
- version_requirements: *2161701080
80
+ version_requirements: *2159600240
48
81
  - !ruby/object:Gem::Dependency
49
82
  name: rcov
50
- requirement: &2161699820 !ruby/object:Gem::Requirement
83
+ requirement: &2159599640 !ruby/object:Gem::Requirement
51
84
  none: false
52
85
  requirements:
53
86
  - - ! '>='
@@ -55,10 +88,11 @@ dependencies:
55
88
  version: '0'
56
89
  type: :development
57
90
  prerelease: false
58
- version_requirements: *2161699820
91
+ version_requirements: *2159599640
59
92
  description: Use the snp-search toolset to query the SNP database
60
93
  email: ali.al-shahib@hpa.org.uk
61
- executables: []
94
+ executables:
95
+ - snp-search
62
96
  extensions: []
63
97
  extra_rdoc_files:
64
98
  - LICENSE.txt
@@ -73,12 +107,13 @@ files:
73
107
  - README.rdoc
74
108
  - Rakefile
75
109
  - VERSION
76
- - lib/snp-search.rb
77
- - lib/snp_db_connection.rb
78
- - lib/snp_db_models.rb
79
- - lib/snp_db_schema.rb
80
- - lib/user_entry_file.rb
110
+ - bin/snp-search
111
+ - lib/snp-search/snp-search.rb
112
+ - lib/snp-search/snp_db_connection.rb
113
+ - lib/snp-search/snp_db_models.rb
114
+ - lib/snp-search/snp_db_schema.rb
81
115
  - snp-search.gemspec
116
+ - snp-search_test.rb
82
117
  - spec/snp-search_spec.rb
83
118
  - spec/spec_helper.rb
84
119
  homepage: http://github.com/hpa-bioinformatics/snp-search
@@ -96,7 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
96
131
  version: '0'
97
132
  segments:
98
133
  - 0
99
- hash: 3531722145808918413
134
+ hash: -4259420816018168147
100
135
  required_rubygems_version: !ruby/object:Gem::Requirement
101
136
  none: false
102
137
  requirements:
@@ -1,21 +0,0 @@
1
- require 'snp-search'
2
-
3
- #path for embl file here
4
- genome_sequence = Bio::FlatFile.open(Bio::EMBL, "path_for_embl_file_here").next_entry
5
-
6
- #path for vcf file here
7
- vcf_mpileup_file = "path_for_vcf_file_here"
8
-
9
- #array of strain names here
10
- strains = ["STRAIN_NAME_1", "STRAIN_NAME_2"]
11
-
12
- # Thats it, you job is done here.
13
-
14
- # The populate_strains method populates the strains in the db. It uses the strain names in array.
15
- populate_strains(strains)
16
-
17
- # The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file.
18
- populate_features_and_annotations(genome_sequence)
19
-
20
- # The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file.
21
- populate_snps_alleles_genotypes(strains, vcf_mpileup_file)