snp-search 0.17.0 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,7 +10,12 @@ To install snp-search, do
10
10
 
11
11
  == Requirements
12
12
 
13
- Nothing! You just need to run this in Unix and it will install all the necessary gems for you from Rubygems (note that Rubygems requires admin privileges. If you do not have admin privileges then we suggest you install RVM: (http://beginrescueend.com/rvm/install/) and then gem install snp-search)
13
+ Not much, you just need:
14
+
15
+ * Unix. When installed it will install all the necessary gems for you from Rubygems (note that Rubygems requires admin privileges. If you do not have admin privileges then we suggest you install RVM: (http://beginrescueend.com/rvm/install/) and then gem install snp-search).
16
+ * ruby version 1.8.7 and above.
17
+
18
+ Thats it!
14
19
 
15
20
  == Running snp-search
16
21
 
@@ -49,6 +54,30 @@ The output is your database in sqlite3 format. If you like to view your table(s
49
54
 
50
55
  Alternatively, you may download a SQL tool to see a GUI of your database (e.g. SQLite sorcerer)
51
56
 
57
+ == Examples
58
+
59
+ We have included two example queries that you may find useful:
60
+
61
+ * Example1: This script queries the database and selects all genes except the phage genes. The output is a FASTA file of the genes. This is a way of removing a set of genes that are not needed for the SNP analysis. You may use this script to do other SQL queries that result in a FASTA output.
62
+
63
+ Usage:
64
+
65
+ ruby example1.rb -d your_db_name.sqlite3 -s list_of_your_species.txt -o output.fasta
66
+
67
+ *Example2: This script queries the database and selects the number of unique SNPs within the list of the strains/samples provided. The output is the number of unique SNPs.
68
+
69
+ Usage:
70
+
71
+ ruby example2.rb -d your_db_name.sqlite3 -s list_of_your_species.txt
72
+
73
+
74
+ == Contact
75
+
76
+ If you have any comments, questions or suggestions, please email
77
+ ali.al-shahib@hpa.org.uk
78
+ or
79
+ anthony.underwood@hpa.org.uk
80
+
52
81
  Have fun snp-searching!
53
82
 
54
83
  == Copyright
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.17.0
1
+ 0.19.0
@@ -2,20 +2,20 @@ require 'snp-search'
2
2
  require 'snp_db_connection'
3
3
  require 'snp_db_models'
4
4
  require 'snp_db_schema'
5
- require 'slop'
6
- gem "slop", "~> 2.4.0"
7
5
 
6
+ gem "slop", "~> 2.4.0"
7
+ require 'slop'
8
8
 
9
9
  opts = Slop.new :help do
10
10
  banner "ruby snp-search [OPTIONS]"
11
11
 
12
12
  on :V, :verbose, 'Enable verbose mode'
13
- on :n, :name=, 'Name of database', :default => 'snp_db.sqlite3'
14
- on :r, :reference_file=, 'Reference genome file, in gbk or embl file format', true
15
- on :v, :vcf_file=, '.vcf file', true
16
- on :s, :strain=, 'text file with a list of strains/samples', true
17
- on :c, :cuttoff_snp=, 'SNP quality cutoff', :default => 90
18
- on :t, :cuttoff_genotype=, 'Genotype quality cutoff', :default => 30
13
+ on :n, :name=, 'Name of database, (default: snp_db.sqlite3)', :default => 'snp_db.sqlite3'
14
+ on :r, :reference_file=, 'Reference genome file, in gbk or embl file format, Required', true
15
+ on :v, :vcf_file=, '.vcf file, Required', true
16
+ on :s, :strain=, 'text file with a list of strains/samples, Required', true
17
+ on :c, :cuttoff_snp=, 'SNP quality cutoff, (default = 90)', :default => 90
18
+ on :t, :cuttoff_genotype=, 'Genotype quality cutoff (default = 30)', :default => 30
19
19
 
20
20
  on_empty do
21
21
  puts help
@@ -23,6 +23,25 @@ opts = Slop.new :help do
23
23
  end
24
24
  opts.parse
25
25
 
26
+ puts "You must supply the -r option, it's a required field" and exit unless opts[:reference_file]
27
+ puts "You must supply the -v option, it's a required field" and exit unless opts[:vcf_file]
28
+ puts "You must supply the -s option, it's a required field" and exit unless opts[:strain]
29
+
30
+ begin
31
+ puts "#{opts[:reference_file]} file does not exist!" and exit unless File.exist?(opts[:reference_file])
32
+ rescue
33
+ end
34
+
35
+ begin
36
+ puts "#{opts[:vcf_file]} file does not exist!" and exit unless File.exist?(opts[:vcf_file])
37
+ rescue
38
+ end
39
+
40
+ begin
41
+ puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
42
+ rescue
43
+ end
44
+
26
45
 
27
46
  begin
28
47
  strains = []
@@ -0,0 +1,92 @@
1
+ # This query script removes the 'phage' genes from the database.
2
+ # Only use this script once your database has been fully populated.
3
+ # Usage: ruby example1.rb -d your_db_name.sqlite3 -s list_of_your_species.txt -o output.fasta
4
+ # You may use this script to do other SQL queries that result in a fasta output. Just change the 'snps' SQL query below with your query.
5
+ require 'snp_db_connection'
6
+ require 'snp_db_models'
7
+ require 'snp_db_schema'
8
+ gem "slop", "~> 2.4.0"
9
+ require 'slop'
10
+
11
+ opts = Slop.new :help do
12
+ banner "ruby query.rb [OPTIONS]"
13
+
14
+ on :V, :verbose, 'Enable verbose mode'
15
+ on :d, :database=, 'The name of the database you like to query', true
16
+ on :o, :outfile=, 'output file, in fasta format', true
17
+ on :s, :strain=, 'The strains/samples you like to query', true
18
+
19
+ on_empty do
20
+ puts help
21
+ end
22
+ end
23
+ opts.parse
24
+
25
+ puts "You must supply the -s option, it's a required field" and exit unless opts[:strain]
26
+ puts "You must supply the -d option, it's a required field" and exit unless opts[:database]
27
+
28
+ begin
29
+ puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database])
30
+ rescue
31
+ end
32
+
33
+ begin
34
+ puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
35
+ rescue
36
+ end
37
+
38
+ establish_connection(opts[:database])
39
+
40
+ begin
41
+ strains = []
42
+ File.read(opts[:strain]).each_line do |line|
43
+ strains << line.chop
44
+ end
45
+
46
+ # Enter the name of your database
47
+
48
+ outfile = File.open(opts[:outfile], "w")
49
+
50
+
51
+ # create a sequence hash
52
+ sequence_hash = Hash.new
53
+
54
+ # create an array of strains
55
+
56
+ # hash key is strain_name, loop through strain_names
57
+ # create an empty array
58
+ strains.each do |strain_name|
59
+ sequence_hash[strain_name] = Array.new
60
+ end
61
+
62
+ snps = Snp.find_by_sql("SELECT snps.* FROM snps
63
+ INNER JOIN features
64
+ ON features.id = snps.feature_id
65
+ WHERE features.id IN
66
+ (select features.id from features
67
+ WHERE id NOT IN
68
+ (select distinct features.id FROM features
69
+ INNER JOIN annotations ON
70
+ annotations.feature_id = features.id
71
+ WHERE annotations.value LIKE '%phage%'))")
72
+
73
+
74
+ #puts snps.size
75
+ puts "Your Query is submitted and is being processed......."
76
+ snps.each do |snp|
77
+ #break if i == 100
78
+ snp.alleles.each do |allele|
79
+ allele.genotypes.each do |genotype|
80
+ # puts genotype.inspect
81
+ sequence_hash[genotype.strain.name] << allele.base
82
+ end
83
+ end
84
+ end
85
+
86
+ strains.each do |sn|
87
+ outfile.print ">#{sn}\n" , sequence_hash[sn].join("")
88
+ outfile.puts
89
+ end
90
+
91
+ rescue
92
+ end
@@ -0,0 +1,61 @@
1
+ # This query script finds the unique snps amongs the list of strains provided.
2
+ # Only use this script once your database has been fully populated.
3
+ # Usage: ruby example2.rb -d your_db_name.sqlite3 -s list_of_your_species.txt
4
+ # Output is the number of unique snps in the list of your strains provided in the -s option.
5
+ # You may use this script to do other SQL queries. Just change the SQL query below with your query.
6
+
7
+ require 'snp_db_models'
8
+ gem "slop", "~> 2.4.0"
9
+ require 'slop'
10
+
11
+ opts = Slop.new :help do
12
+ banner "ruby query.rb [OPTIONS]"
13
+
14
+ on :V, :verbose, 'Enable verbose mode'
15
+ on :d, :database=, 'The name of the database you like to query', true
16
+ on :s, :strain=, 'The strains/samples you like to query', true
17
+
18
+ on_empty do
19
+ puts help
20
+ end
21
+ end
22
+ opts.parse
23
+
24
+ puts "You must supply the -d option, it's a required field" and exit unless opts[:database]
25
+ puts "You must supply the -s option, it's a required field" and exit unless opts[:strain]
26
+
27
+ begin
28
+ puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database])
29
+ rescue
30
+ end
31
+
32
+ begin
33
+ puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
34
+ rescue
35
+ end
36
+
37
+
38
+ establish_connection(opts[:database])
39
+
40
+ begin
41
+ strains = []
42
+ File.read(opts[:strain]).each_line do |line|
43
+ strains << line.chop
44
+ end
45
+
46
+ def find_shared_snps(strain_names)
47
+ *strain_names = strain_names
48
+
49
+ where_statement = strain_names.collect{|strain_name| "strains.name = '#{strain_name}' OR "}.join("").sub(/ OR $/, "")
50
+
51
+ return Snp.find_by_sql("SELECT * FROM (SELECT features.* from features INNER JOIN snps ON features.id = snps.feature_id INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id INNER JOIN strains ON strains.id = genotypes.strain_id WHERE (#{where_statement}) AND alleles.id <> snps.reference_allele_id AND (SELECT COUNT(*) from snps AS s INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id WHERE alleles.id <> snps.reference_allele_id and s.id = snps.id) = #{strain_names.size} GROUP BY snps.id HAVING COUNT(*) = #{strain_names.size})");
52
+ end
53
+
54
+ gas_snps = find_shared_snps(strains)
55
+
56
+ gas_snps.each do |snp|
57
+ puts "The number of unique snps are #{snp.id}"
58
+ end
59
+
60
+ rescue
61
+ end
@@ -1,5 +1,7 @@
1
1
  gem 'activerecord', "~> 3.1.3"
2
2
  require 'active_record'
3
+ gem 'sqlite3', "~> 1.3.4"
4
+ require 'sqlite3'
3
5
  def establish_connection(db_location)
4
6
  ActiveRecord::Base.establish_connection(
5
7
  :adapter => "sqlite3",
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "snp-search"
8
- s.version = "0.17.0"
8
+ s.version = "0.19.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ali Al-Shahib", "Anthony Underwood"]
12
- s.date = "2011-12-01"
12
+ s.date = "2011-12-06"
13
13
  s.description = "Use the snp-search toolset to query the SNP database"
14
14
  s.email = "ali.al-shahib@hpa.org.uk"
15
15
  s.executables = ["snp-search"]
@@ -28,6 +28,8 @@ Gem::Specification.new do |s|
28
28
  "Rakefile",
29
29
  "VERSION",
30
30
  "bin/snp-search",
31
+ "examples/example1.rb",
32
+ "examples/example2.rb",
31
33
  "lib/snp-search.rb",
32
34
  "lib/snp_db_connection.rb",
33
35
  "lib/snp_db_models.rb",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snp-search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.19.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2011-12-01 00:00:00.000000000Z
13
+ date: 2011-12-06 00:00:00.000000000Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord
17
- requirement: &2158879840 !ruby/object:Gem::Requirement
17
+ requirement: &2158876860 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 3.1.3
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2158879840
25
+ version_requirements: *2158876860
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: bio
28
- requirement: &2158879360 !ruby/object:Gem::Requirement
28
+ requirement: &2158876360 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.4.2
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2158879360
36
+ version_requirements: *2158876360
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: slop
39
- requirement: &2158878880 !ruby/object:Gem::Requirement
39
+ requirement: &2158875880 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 2.4.0
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *2158878880
47
+ version_requirements: *2158875880
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &2158878380 !ruby/object:Gem::Requirement
50
+ requirement: &2158875340 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.3.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *2158878380
58
+ version_requirements: *2158875340
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: bundler
61
- requirement: &2158877900 !ruby/object:Gem::Requirement
61
+ requirement: &2158874760 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: 1.0.0
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *2158877900
69
+ version_requirements: *2158874760
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: jeweler
72
- requirement: &2158877420 !ruby/object:Gem::Requirement
72
+ requirement: &2158874240 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ~>
@@ -77,10 +77,10 @@ dependencies:
77
77
  version: 1.6.4
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *2158877420
80
+ version_requirements: *2158874240
81
81
  - !ruby/object:Gem::Dependency
82
82
  name: rcov
83
- requirement: &2158876940 !ruby/object:Gem::Requirement
83
+ requirement: &2158873720 !ruby/object:Gem::Requirement
84
84
  none: false
85
85
  requirements:
86
86
  - - ! '>='
@@ -88,7 +88,7 @@ dependencies:
88
88
  version: '0'
89
89
  type: :development
90
90
  prerelease: false
91
- version_requirements: *2158876940
91
+ version_requirements: *2158873720
92
92
  description: Use the snp-search toolset to query the SNP database
93
93
  email: ali.al-shahib@hpa.org.uk
94
94
  executables:
@@ -108,6 +108,8 @@ files:
108
108
  - Rakefile
109
109
  - VERSION
110
110
  - bin/snp-search
111
+ - examples/example1.rb
112
+ - examples/example2.rb
111
113
  - lib/snp-search.rb
112
114
  - lib/snp_db_connection.rb
113
115
  - lib/snp_db_models.rb
@@ -130,7 +132,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
130
132
  version: '0'
131
133
  segments:
132
134
  - 0
133
- hash: -3544797148911864795
135
+ hash: 339676868583420236
134
136
  required_rubygems_version: !ruby/object:Gem::Requirement
135
137
  none: false
136
138
  requirements: