snp-search 0.17.0 → 0.19.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +30 -1
- data/VERSION +1 -1
- data/bin/snp-search +27 -8
- data/examples/example1.rb +92 -0
- data/examples/example2.rb +61 -0
- data/lib/snp_db_connection.rb +2 -0
- data/snp-search.gemspec +4 -2
- metadata +19 -17
data/README.rdoc
CHANGED
@@ -10,7 +10,12 @@ To install snp-search, do
|
|
10
10
|
|
11
11
|
== Requirements
|
12
12
|
|
13
|
-
|
13
|
+
Not much, you just need:
|
14
|
+
|
15
|
+
* Unix. When installed it will install all the necessary gems for you from Rubygems (note that Rubygems requires admin privileges. If you do not have admin privileges then we suggest you install RVM: (http://beginrescueend.com/rvm/install/) and then gem install snp-search).
|
16
|
+
* ruby version 1.8.7 and above.
|
17
|
+
|
18
|
+
Thats it!
|
14
19
|
|
15
20
|
== Running snp-search
|
16
21
|
|
@@ -49,6 +54,30 @@ The output is your database in sqlite3 format. If you like to view your table(s
|
|
49
54
|
|
50
55
|
Alternatively, you may download a SQL tool to see a GUI of your database (e.g. SQLite sorcerer)
|
51
56
|
|
57
|
+
== Examples
|
58
|
+
|
59
|
+
We have included two example queries that you may find useful:
|
60
|
+
|
61
|
+
* Example1: This script queries the database and selects all genes except the phage genes. The output is a FASTA file of the genes. This is a way of removing a set of genes that are not needed for the SNP analysis. You may use this script to do other SQL queries that result in a FASTA output.
|
62
|
+
|
63
|
+
Usage:
|
64
|
+
|
65
|
+
ruby example1.rb -d your_db_name.sqlite3 -s list_of_your_species.txt -o output.fasta
|
66
|
+
|
67
|
+
*Example2: This script queries the database and selects the number of unique SNPs within the list of the strains/samples provided. The output is the number of unique SNPs.
|
68
|
+
|
69
|
+
Usage:
|
70
|
+
|
71
|
+
ruby example2.rb -d your_db_name.sqlite3 -s list_of_your_species.txt
|
72
|
+
|
73
|
+
|
74
|
+
== Contact
|
75
|
+
|
76
|
+
If you have any comments, questions or suggestions, please email
|
77
|
+
ali.al-shahib@hpa.org.uk
|
78
|
+
or
|
79
|
+
anthony.underwood@hpa.org.uk
|
80
|
+
|
52
81
|
Have fun snp-searching!
|
53
82
|
|
54
83
|
== Copyright
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.19.0
|
data/bin/snp-search
CHANGED
@@ -2,20 +2,20 @@ require 'snp-search'
|
|
2
2
|
require 'snp_db_connection'
|
3
3
|
require 'snp_db_models'
|
4
4
|
require 'snp_db_schema'
|
5
|
-
require 'slop'
|
6
|
-
gem "slop", "~> 2.4.0"
|
7
5
|
|
6
|
+
gem "slop", "~> 2.4.0"
|
7
|
+
require 'slop'
|
8
8
|
|
9
9
|
opts = Slop.new :help do
|
10
10
|
banner "ruby snp-search [OPTIONS]"
|
11
11
|
|
12
12
|
on :V, :verbose, 'Enable verbose mode'
|
13
|
-
on :n, :name=, 'Name of database', :default => 'snp_db.sqlite3'
|
14
|
-
on :r, :reference_file=, 'Reference genome file, in gbk or embl file format', true
|
15
|
-
on :v, :vcf_file=, '.vcf file', true
|
16
|
-
on :s, :strain=, 'text file with a list of strains/samples', true
|
17
|
-
on :c, :cuttoff_snp=, 'SNP quality cutoff', :default => 90
|
18
|
-
on :t, :cuttoff_genotype=, 'Genotype quality cutoff', :default => 30
|
13
|
+
on :n, :name=, 'Name of database, (default: snp_db.sqlite3)', :default => 'snp_db.sqlite3'
|
14
|
+
on :r, :reference_file=, 'Reference genome file, in gbk or embl file format, Required', true
|
15
|
+
on :v, :vcf_file=, '.vcf file, Required', true
|
16
|
+
on :s, :strain=, 'text file with a list of strains/samples, Required', true
|
17
|
+
on :c, :cuttoff_snp=, 'SNP quality cutoff, (default = 90)', :default => 90
|
18
|
+
on :t, :cuttoff_genotype=, 'Genotype quality cutoff (default = 30)', :default => 30
|
19
19
|
|
20
20
|
on_empty do
|
21
21
|
puts help
|
@@ -23,6 +23,25 @@ opts = Slop.new :help do
|
|
23
23
|
end
|
24
24
|
opts.parse
|
25
25
|
|
26
|
+
puts "You must supply the -r option, it's a required field" and exit unless opts[:reference_file]
|
27
|
+
puts "You must supply the -v option, it's a required field" and exit unless opts[:vcf_file]
|
28
|
+
puts "You must supply the -s option, it's a required field" and exit unless opts[:strain]
|
29
|
+
|
30
|
+
begin
|
31
|
+
puts "#{opts[:reference_file]} file does not exist!" and exit unless File.exist?(opts[:reference_file])
|
32
|
+
rescue
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
puts "#{opts[:vcf_file]} file does not exist!" and exit unless File.exist?(opts[:vcf_file])
|
37
|
+
rescue
|
38
|
+
end
|
39
|
+
|
40
|
+
begin
|
41
|
+
puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
|
42
|
+
rescue
|
43
|
+
end
|
44
|
+
|
26
45
|
|
27
46
|
begin
|
28
47
|
strains = []
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# This query script removes the 'phage' genes from the database.
|
2
|
+
# Only use this script once your database has been fully populated.
|
3
|
+
# Usage: ruby example1.rb -d your_db_name.sqlite3 -s list_of_your_species.txt -o output.fasta
|
4
|
+
# You may use this script to do other SQL queries that result in a fasta output. Just change the 'snps' SQL query below with your query.
|
5
|
+
require 'snp_db_connection'
|
6
|
+
require 'snp_db_models'
|
7
|
+
require 'snp_db_schema'
|
8
|
+
gem "slop", "~> 2.4.0"
|
9
|
+
require 'slop'
|
10
|
+
|
11
|
+
opts = Slop.new :help do
|
12
|
+
banner "ruby query.rb [OPTIONS]"
|
13
|
+
|
14
|
+
on :V, :verbose, 'Enable verbose mode'
|
15
|
+
on :d, :database=, 'The name of the database you like to query', true
|
16
|
+
on :o, :outfile=, 'output file, in fasta format', true
|
17
|
+
on :s, :strain=, 'The strains/samples you like to query', true
|
18
|
+
|
19
|
+
on_empty do
|
20
|
+
puts help
|
21
|
+
end
|
22
|
+
end
|
23
|
+
opts.parse
|
24
|
+
|
25
|
+
puts "You must supply the -s option, it's a required field" and exit unless opts[:strain]
|
26
|
+
puts "You must supply the -d option, it's a required field" and exit unless opts[:database]
|
27
|
+
|
28
|
+
begin
|
29
|
+
puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database])
|
30
|
+
rescue
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
|
35
|
+
rescue
|
36
|
+
end
|
37
|
+
|
38
|
+
establish_connection(opts[:database])
|
39
|
+
|
40
|
+
begin
|
41
|
+
strains = []
|
42
|
+
File.read(opts[:strain]).each_line do |line|
|
43
|
+
strains << line.chop
|
44
|
+
end
|
45
|
+
|
46
|
+
# Enter the name of your database
|
47
|
+
|
48
|
+
outfile = File.open(opts[:outfile], "w")
|
49
|
+
|
50
|
+
|
51
|
+
# create a sequence hash
|
52
|
+
sequence_hash = Hash.new
|
53
|
+
|
54
|
+
# create an array of strains
|
55
|
+
|
56
|
+
# hash key is strain_name, loop through strain_names
|
57
|
+
# create an empty array
|
58
|
+
strains.each do |strain_name|
|
59
|
+
sequence_hash[strain_name] = Array.new
|
60
|
+
end
|
61
|
+
|
62
|
+
snps = Snp.find_by_sql("SELECT snps.* FROM snps
|
63
|
+
INNER JOIN features
|
64
|
+
ON features.id = snps.feature_id
|
65
|
+
WHERE features.id IN
|
66
|
+
(select features.id from features
|
67
|
+
WHERE id NOT IN
|
68
|
+
(select distinct features.id FROM features
|
69
|
+
INNER JOIN annotations ON
|
70
|
+
annotations.feature_id = features.id
|
71
|
+
WHERE annotations.value LIKE '%phage%'))")
|
72
|
+
|
73
|
+
|
74
|
+
#puts snps.size
|
75
|
+
puts "Your Query is submitted and is being processed......."
|
76
|
+
snps.each do |snp|
|
77
|
+
#break if i == 100
|
78
|
+
snp.alleles.each do |allele|
|
79
|
+
allele.genotypes.each do |genotype|
|
80
|
+
# puts genotype.inspect
|
81
|
+
sequence_hash[genotype.strain.name] << allele.base
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
strains.each do |sn|
|
87
|
+
outfile.print ">#{sn}\n" , sequence_hash[sn].join("")
|
88
|
+
outfile.puts
|
89
|
+
end
|
90
|
+
|
91
|
+
rescue
|
92
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# This query script finds the unique snps amongs the list of strains provided.
|
2
|
+
# Only use this script once your database has been fully populated.
|
3
|
+
# Usage: ruby example2.rb -d your_db_name.sqlite3 -s list_of_your_species.txt
|
4
|
+
# Output is the number of unique snps in the list of your strains provided in the -s option.
|
5
|
+
# You may use this script to do other SQL queries. Just change the SQL query below with your query.
|
6
|
+
|
7
|
+
require 'snp_db_models'
|
8
|
+
gem "slop", "~> 2.4.0"
|
9
|
+
require 'slop'
|
10
|
+
|
11
|
+
opts = Slop.new :help do
|
12
|
+
banner "ruby query.rb [OPTIONS]"
|
13
|
+
|
14
|
+
on :V, :verbose, 'Enable verbose mode'
|
15
|
+
on :d, :database=, 'The name of the database you like to query', true
|
16
|
+
on :s, :strain=, 'The strains/samples you like to query', true
|
17
|
+
|
18
|
+
on_empty do
|
19
|
+
puts help
|
20
|
+
end
|
21
|
+
end
|
22
|
+
opts.parse
|
23
|
+
|
24
|
+
puts "You must supply the -d option, it's a required field" and exit unless opts[:database]
|
25
|
+
puts "You must supply the -s option, it's a required field" and exit unless opts[:strain]
|
26
|
+
|
27
|
+
begin
|
28
|
+
puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database])
|
29
|
+
rescue
|
30
|
+
end
|
31
|
+
|
32
|
+
begin
|
33
|
+
puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
|
34
|
+
rescue
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
establish_connection(opts[:database])
|
39
|
+
|
40
|
+
begin
|
41
|
+
strains = []
|
42
|
+
File.read(opts[:strain]).each_line do |line|
|
43
|
+
strains << line.chop
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_shared_snps(strain_names)
|
47
|
+
*strain_names = strain_names
|
48
|
+
|
49
|
+
where_statement = strain_names.collect{|strain_name| "strains.name = '#{strain_name}' OR "}.join("").sub(/ OR $/, "")
|
50
|
+
|
51
|
+
return Snp.find_by_sql("SELECT * FROM (SELECT features.* from features INNER JOIN snps ON features.id = snps.feature_id INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id INNER JOIN strains ON strains.id = genotypes.strain_id WHERE (#{where_statement}) AND alleles.id <> snps.reference_allele_id AND (SELECT COUNT(*) from snps AS s INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id WHERE alleles.id <> snps.reference_allele_id and s.id = snps.id) = #{strain_names.size} GROUP BY snps.id HAVING COUNT(*) = #{strain_names.size})");
|
52
|
+
end
|
53
|
+
|
54
|
+
gas_snps = find_shared_snps(strains)
|
55
|
+
|
56
|
+
gas_snps.each do |snp|
|
57
|
+
puts "The number of unique snps are #{snp.id}"
|
58
|
+
end
|
59
|
+
|
60
|
+
rescue
|
61
|
+
end
|
data/lib/snp_db_connection.rb
CHANGED
data/snp-search.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "snp-search"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.19.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ali Al-Shahib", "Anthony Underwood"]
|
12
|
-
s.date = "2011-12-
|
12
|
+
s.date = "2011-12-06"
|
13
13
|
s.description = "Use the snp-search toolset to query the SNP database"
|
14
14
|
s.email = "ali.al-shahib@hpa.org.uk"
|
15
15
|
s.executables = ["snp-search"]
|
@@ -28,6 +28,8 @@ Gem::Specification.new do |s|
|
|
28
28
|
"Rakefile",
|
29
29
|
"VERSION",
|
30
30
|
"bin/snp-search",
|
31
|
+
"examples/example1.rb",
|
32
|
+
"examples/example2.rb",
|
31
33
|
"lib/snp-search.rb",
|
32
34
|
"lib/snp_db_connection.rb",
|
33
35
|
"lib/snp_db_models.rb",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snp-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2011-12-
|
13
|
+
date: 2011-12-06 00:00:00.000000000Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activerecord
|
17
|
-
requirement: &
|
17
|
+
requirement: &2158876860 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 3.1.3
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2158876860
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: bio
|
28
|
-
requirement: &
|
28
|
+
requirement: &2158876360 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.4.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *2158876360
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: slop
|
39
|
-
requirement: &
|
39
|
+
requirement: &2158875880 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.4.0
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *2158875880
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: rspec
|
50
|
-
requirement: &
|
50
|
+
requirement: &2158875340 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 2.3.0
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *2158875340
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: bundler
|
61
|
-
requirement: &
|
61
|
+
requirement: &2158874760 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,10 +66,10 @@ dependencies:
|
|
66
66
|
version: 1.0.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *2158874760
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: jeweler
|
72
|
-
requirement: &
|
72
|
+
requirement: &2158874240 !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
@@ -77,10 +77,10 @@ dependencies:
|
|
77
77
|
version: 1.6.4
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
|
-
version_requirements: *
|
80
|
+
version_requirements: *2158874240
|
81
81
|
- !ruby/object:Gem::Dependency
|
82
82
|
name: rcov
|
83
|
-
requirement: &
|
83
|
+
requirement: &2158873720 !ruby/object:Gem::Requirement
|
84
84
|
none: false
|
85
85
|
requirements:
|
86
86
|
- - ! '>='
|
@@ -88,7 +88,7 @@ dependencies:
|
|
88
88
|
version: '0'
|
89
89
|
type: :development
|
90
90
|
prerelease: false
|
91
|
-
version_requirements: *
|
91
|
+
version_requirements: *2158873720
|
92
92
|
description: Use the snp-search toolset to query the SNP database
|
93
93
|
email: ali.al-shahib@hpa.org.uk
|
94
94
|
executables:
|
@@ -108,6 +108,8 @@ files:
|
|
108
108
|
- Rakefile
|
109
109
|
- VERSION
|
110
110
|
- bin/snp-search
|
111
|
+
- examples/example1.rb
|
112
|
+
- examples/example2.rb
|
111
113
|
- lib/snp-search.rb
|
112
114
|
- lib/snp_db_connection.rb
|
113
115
|
- lib/snp_db_models.rb
|
@@ -130,7 +132,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
130
132
|
version: '0'
|
131
133
|
segments:
|
132
134
|
- 0
|
133
|
-
hash:
|
135
|
+
hash: 339676868583420236
|
134
136
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
137
|
none: false
|
136
138
|
requirements:
|