bio-phyta 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ source "http://rubygems.org"
2
+
3
+ # MRI only for now
4
+
5
+ # Runtime dependencies
6
+ gem "bio", ">= 1.4.2"
7
+ gem "mysql", ">= 2.8.1"
8
+ # For JRuby: gem "mysql", "~> 2.8.1"
9
+ gem "sequel", ">= 3.28.0"
10
+ gem "fastercsv", ">= 1.5.4" # only for 1.8.7
11
+ gem "nokogiri", ">= 1.5.0"
12
+ gem "trollop", ">= 1.16.2"
13
+
14
+
15
+ # Add dependencies to develop your gem here.
16
+ # Include everything needed to run rake, tests, features, etc.
17
+ group :development do
18
+ gem "shoulda", ">= 0"
19
+ gem "bundler", "~> 1.0.0"
20
+ gem "jeweler", "~> 1.6.4"
21
+ gem "rcov", ">= 0"
22
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Philipp Comans
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = bio-phyta
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to bio-phyta
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2011 Philipp Comans. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-phyta"
18
+ gem.homepage = "http://github.com/pcomans/bioruby-phyta"
19
+ gem.license = "LGPL"
20
+ gem.summary = "Pipeline to remove contaminations from EST libraries"
21
+ gem.description = "Coming soon"
22
+ gem.email = "philipp.comans@googlemail.com"
23
+ gem.authors = ["Philipp Comans"]
24
+ # Remove test data from the gem
25
+ gem.files.exclude "test/data/**/*"
26
+ # dependencies defined in Gemfile
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rake/testtask'
31
+ Rake::TestTask.new(:test) do |test|
32
+ test.libs << 'lib' << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+
37
+ require 'rcov/rcovtask'
38
+ Rcov::RcovTask.new do |test|
39
+ test.libs << 'test'
40
+ test.pattern = 'test/**/test_*.rb'
41
+ test.verbose = true
42
+ test.rcov_opts << '--exclude "gems/*"'
43
+ end
44
+
45
+ task :default => :test
46
+
47
+ require 'rake/rdoctask'
48
+ Rake::RDocTask.new do |rdoc|
49
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "bio-phyta #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'trollop'
5
+
6
+ SCRIPT_NAME = "phyta-assign"
7
+
8
+ #parse command line arguments
9
+ opts = Trollop::options do
10
+ opt :input_file, "The output of the BLASTplus alignment in XML format", :type => String
11
+ opt :output_file, "The name of the output table in CSV format", :type => String
12
+ opt :database_server, "Optional: The address of the MySQL database server", :type => String, :default => "localhost"
13
+ opt :database_user, "Optional: The name of the database user", :type => String, :default => "root", :short => "-u"
14
+ opt :database_password, "Optional: The password of the database user", :type => String, :default => "no password", :short => "-p"
15
+ opt :database_name, "Optional: The name of the NCBI taxonomy database", :type => String, :default => "kingdom_assignment_taxonomy", :short => "-n"
16
+ end
17
+
18
+ unless opts[:input_file_given] && opts[:output_file_given]
19
+ puts "Invalid arguments, see --help for more information."
20
+ abort
21
+ end
22
+
23
+ unless opts[:database_password_given]
24
+ opts[:database_password] = nil
25
+ end
26
+
27
+ #Use the correct database connector
28
+ if RUBY_PLATFORM =~ /java/
29
+ puts "You are running JRuby, the jdbc/mysql database connector will be used."
30
+ require 'jdbc/mysql'
31
+ else
32
+ require 'mysql'
33
+ end
34
+
35
+ require 'sequel'
36
+ require 'nokogiri'
37
+ require 'bio'
38
+
39
+ require 'csv'
40
+
41
+ if CSV.const_defined? :Reader
42
+ require 'fastercsv'
43
+ INSTALLED_CSV = FasterCSV
44
+ else
45
+ INSTALLED_CSV = CSV
46
+ end
47
+
48
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
49
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
50
+
51
+ require 'kingdom_db'
52
+ require 'blast_string_parser'
53
+
54
+ rootpath = File.dirname(File.dirname(__FILE__))
55
+ PHYTA_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
56
+ puts "Running #{SCRIPT_NAME} #{PHYTA_VERSION}"
57
+
58
+ puts "Settings: " + opts.inspect
59
+
60
+ #Initialize auxiliary classes
61
+
62
+ blast_parser = BlastStringParser.new()
63
+
64
+ #Open input file
65
+
66
+ unless File.exists?(opts[:input_file])
67
+ puts "The input file at " + File.expand_path(opts[:input_file]) + " could not be opened!"
68
+ exit
69
+ end
70
+
71
+ file = File.new(opts[:input_file])
72
+ reader = Nokogiri::XML::Reader(file)
73
+
74
+ #Initialize database
75
+ db = KingdomDB.new(opts[:database_server], opts[:database_user], opts[:database_password], opts[:database_name])
76
+
77
+ #Initialize output file
78
+
79
+ if File.exists?(opts[:output_file])
80
+ puts "The output file at " + File.expand_path(opts[:output_file]) + " already exists!"
81
+ exit
82
+ end
83
+ output = INSTALLED_CSV.open(opts[:output_file], "w", {
84
+ :col_sep => ";",
85
+ :headers => ["query sequence id", "hit accession number", "sgi", "evalue", "species", "subject annotation", "subject score", "kingdom"],
86
+ :write_headers => true})
87
+
88
+ filter_array = [
89
+ "Bacteria",
90
+ "Archaea",
91
+ "Viridiplantae",
92
+ "Rhodophyta",
93
+ "Glaucocystophyceae",
94
+ "Alveolata",
95
+ "Cryptophyta",
96
+ "stramenopiles", #<- Change
97
+ "Amoebozoa",
98
+ "Apusozoa",
99
+ "Euglenozoa",
100
+ "Fornicata",
101
+ "Haptophyceae",
102
+ "Heterolobosea",
103
+ "Jakobida",
104
+ "Katablepharidophyta",
105
+ "Malawimonadidae",
106
+ "Nucleariidae",
107
+ "Oxymonadida",
108
+ "Parabasalia",
109
+ "Rhizaria",
110
+ "unclassified eukaryotes",
111
+ "Fungi",
112
+ "Metazoa",
113
+ "Choanoflagellida",
114
+ "Opisthokonta incertae sedis", #"Fungi/Metazoa incertae sedis"
115
+ "Viruses"
116
+ ]
117
+
118
+ filter_hash = db.get_filter(filter_array)
119
+
120
+ current_query = ""
121
+ hit_id = ""
122
+ hit_def = ""
123
+ hit_accession = ""
124
+ hsp_evalue = ""
125
+ subject_score = ""
126
+ kingdom = ""
127
+
128
+ #Go through the XML with a pull-parser
129
+ reader.each do |elem|
130
+
131
+ if elem.name == "Iteration_query-def"&& elem.node_type == Nokogiri::XML::Node::ELEMENT_NODE
132
+ #We are at the beginning of an iteration
133
+ current_query = elem.inner_xml
134
+
135
+ elsif elem.name == "Hit" && elem.node_type == Nokogiri::XML::Node::ELEMENT_NODE
136
+ #We are at the beginning of a Hit
137
+ #Load the node representing this hit into memory and extract required information
138
+ hit = Nokogiri::XML(elem.outer_xml)
139
+ hit_id = hit.xpath("//Hit_id").inner_text
140
+ hit_def = hit.xpath("//Hit_def").inner_text
141
+ hit_accession = hit.xpath("//Hit_accession").inner_text
142
+ hsp_evalue = hit.xpath("//Hsp[1]/Hsp_evalue").inner_text.to_f #Yep, the first element really has number 1
143
+ subject_score = hit.xpath("//Hsp[1]/Hsp_bit-score").inner_text.to_f #Set the subject score to be the bit-score of the first HSP
144
+ subject_gi = blast_parser.get_sgi_info(hit_id)
145
+
146
+ species_name = nil
147
+
148
+ begin
149
+ species_name = blast_parser.get_species_name(hit_def)
150
+ rescue RuntimeError
151
+ puts "Can not find " + hit_def.to_s[0..20] + "..."
152
+ begin
153
+ species_name = db.name_from_gi(subject_gi)
154
+ rescue RuntimeError
155
+ puts "ERROR: Could not find gi " + subject_gi.to_s
156
+ end
157
+ end
158
+
159
+ begin
160
+ kingdom = db.match_filter(species_name, filter_hash)
161
+ rescue RuntimeError
162
+ kingdom = "NOT FOUND"
163
+ end
164
+ if kingdom.nil?
165
+ kingdom = "NONE"
166
+ end
167
+
168
+ output << [blast_parser.get_query_seq(current_query), hit_accession, subject_gi, hsp_evalue, species_name, blast_parser.get_subject_annotation(hit_def), subject_score, kingdom]
169
+
170
+ end
171
+
172
+ end
173
+
174
+ output.close
175
+ puts "Parsing finished!"
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ SCRIPT_NAME = "phyta-extract"
4
+
5
+ def table_to_set(table, header)
6
+ result = Set.new()
7
+ table.each do |current_row|
8
+ current = current_row[header]
9
+ if current.nil?
10
+ raise "Error: no entry found for header " + header.to_s + " at " + current_row.inspect
11
+ end
12
+
13
+ unless result.include?(current)
14
+ result.add(current)
15
+ else
16
+ raise "Error: duplicate entry for " + current.to_s
17
+ end
18
+ end
19
+ return result
20
+ end
21
+
22
+ #parse command line arguments
23
+ settings = {}
24
+ unless ARGV.size == 5
25
+ puts "Usage: kingdom-extraction sequences.fasta clean.csv contaminated.csv clean_output.fasta contaminated_output.fasta"
26
+ exit
27
+ end
28
+
29
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
30
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
31
+
32
+ require 'rubygems'
33
+ require 'csv'
34
+ require 'set'
35
+ require 'bio'
36
+
37
+ rootpath = File.dirname(File.dirname(__FILE__))
38
+ PHYTA_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
39
+ puts "Running #{SCRIPT_NAME} #{PHYTA_VERSION}"
40
+
41
+ settings[:input_fasta] = ARGV.shift
42
+ settings[:input_clean] = ARGV.shift
43
+ settings[:input_contaminated] = ARGV.shift
44
+ settings[:output_clean] = ARGV.shift
45
+ settings[:output_contaminated] = ARGV.shift
46
+
47
+ unless File.exists?(settings[:input_fasta])
48
+ puts "The input file at " + File.expand_path(settings[:input_fasta]) + " could not be opened!"
49
+ exit
50
+ end
51
+
52
+ unless File.exists?(settings[:input_clean])
53
+ puts "The input file at " + File.expand_path(settings[:input_clean]) + " could not be opened!"
54
+ exit
55
+ end
56
+
57
+ unless File.exists?(settings[:input_contaminated])
58
+ puts "The input file at " + File.expand_path(settings[:input_contaminated]) + " could not be opened!"
59
+ exit
60
+ end
61
+
62
+ if File.exists?(settings[:output_clean])
63
+ puts "The input file at " + File.expand_path(settings[:output_clean]) + " already exists!"
64
+ exit
65
+ end
66
+
67
+ if File.exists?(settings[:output_contaminated])
68
+ puts "The input file at " + File.expand_path(settings[:output_contaminated]) + " already exists!"
69
+ exit
70
+ end
71
+
72
+ #CSV backwards compatibility
73
+ if CSV.const_defined? :Reader
74
+ require 'fastercsv'
75
+ INSTALLED_CSV = FasterCSV
76
+ else
77
+ INSTALLED_CSV = CSV
78
+ end
79
+
80
+ #Open output of Kingdom-Splitter, save clean and contaminated sequence ids in two sets
81
+ puts "Reading clean..."
82
+ clean_table = INSTALLED_CSV.open(settings[:input_clean], "r", { :col_sep => ";", :headers => :first_row, :header_converters => :symbol})
83
+ clean = table_to_set(clean_table, :query_sequence_id)
84
+ clean_table.close
85
+
86
+ puts "Reading contaminated..."
87
+ contaminated_table = INSTALLED_CSV.open(settings[:input_contaminated], "r", { :col_sep => ";", :headers => :first_row, :header_converters => :symbol})
88
+ contaminated = table_to_set(contaminated_table, :query_sequence_id)
89
+ contaminated_table.close
90
+
91
+ #Initialize output files
92
+ clean_out = File.open(settings[:output_clean], "w")
93
+ contaminated_out = File.open(settings[:output_contaminated], "w")
94
+
95
+ puts "Extracting FASTA sequences..."
96
+ QUERY_SEQ_REGEXP = /\A(\S+)\s.*\z/ #Make sure this is exactly the same as in BlastStringParser in Kingdom-Assignment
97
+
98
+ sequences = Bio::FastaFormat.open(settings[:input_fasta])
99
+ sequences.each do |entry|
100
+ current = QUERY_SEQ_REGEXP.match(entry.definition)[1] #TODO do something when this comparison fails
101
+ if clean.include?(current)
102
+ #Sequence belongs in the clean set
103
+ clean_out.write(entry)
104
+ elsif contaminated.include?(current)
105
+ #Sequence belongs in the contaminated set
106
+ contaminated_out.write(entry)
107
+ else
108
+ #Sequence is not annotated
109
+ end
110
+
111
+ end
112
+
113
+ sequences.close
114
+ clean_out.close
115
+ contaminated_out.close
116
+
117
+ puts "Done!"
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'net/ftp'
5
+ require 'tmpdir'
6
+
7
+ require 'trollop'
8
+ require 'sequel'
9
+
10
+ #parse command line arguments
11
+ opts = Trollop::options do
12
+ opt :database_server, "Optional: The address of the MySQL database server", :type => String, :default => "localhost"
13
+ opt :database_user, "Optional: The name of the database user", :type => String, :default => "root", :short => "-u"
14
+ opt :database_password, "Optional: The password of the database user", :type => String, :default => "no password", :short => "-p"
15
+ opt :database_name, "Optional: The name of the NCBI taxonomy database", :type => String, :default => "kingdom_assignment_taxonomy", :short => "-n"
16
+ end
17
+
18
+ unless opts[:database_password_given]
19
+ opts[:database_password] = nil
20
+ end
21
+
22
+ #Connect to the target database
23
+ connect_string = 'mysql://'+ opts[:database_server] + '/' + opts[:database_name] + '?user=' + opts[:database_user]
24
+
25
+ if !opts[:database_password].nil?
26
+ connect_string = connect_string + '&password=' + opts[:database_password]
27
+ end
28
+
29
+ if RUBY_PLATFORM =~ /java/
30
+ #This is JRuby, using jdbc
31
+ require 'jdbc/mysql'
32
+ connect_string = 'jdbc:' + connect_string
33
+ else
34
+ require 'mysql'
35
+ end
36
+
37
+ PROTEIN_TABLE_NAME = 'proteinGiToTaxonId'
38
+ NAMES_TABLE_NAME = 'names'
39
+ NODES_TABLE_NAME = 'nodes'
40
+
41
+ database = Sequel.connect(connect_string)
42
+
43
+ #Test the database connection
44
+ #Better fail now than after downloading all that stuff from the NCBI webservers
45
+ begin
46
+ database.run "SHOW TABLES"
47
+ rescue Sequel::DatabaseConnectionError => e
48
+ abort "Could not connect to database: #{e.message}"
49
+ end
50
+
51
+ #Connect to the NCBI taxonomy db
52
+ ftp = Net::FTP.new('ftp.ncbi.nih.gov')
53
+ ftp.login
54
+ files = ftp.chdir('pub/taxonomy/')
55
+
56
+ #Do the following in a temporary directory, automatically delete it afterwards
57
+ Dir.mktmpdir do |dir|
58
+ Dir.chdir(dir)
59
+
60
+ tax_dmp = 'taxdump.tar.gz'
61
+ puts "Downloading #{tax_dmp}... "
62
+ ftp.getbinaryfile(tax_dmp, tax_dmp)
63
+ taxdump_md5 = ftp.gettextfile(tax_dmp + ".md5")
64
+ #TODO validate checksum
65
+
66
+ prot_dmp = 'gi_taxid_prot.dmp.gz'
67
+ puts "Downloading #{prot_dmp}... "
68
+ ftp.getbinaryfile(prot_dmp, prot_dmp)
69
+
70
+ puts "Extracting files..."
71
+ `tar -xzf #{tax_dmp}`
72
+ `gunzip #{prot_dmp}`
73
+
74
+ # The following is taken from
75
+ # http://bergelson.uchicago.edu/Members/mhorton/taxonomydb.build
76
+
77
+ puts "Populating database tables..."
78
+ database.drop_table(PROTEIN_TABLE_NAME) if database.table_exists?(PROTEIN_TABLE_NAME)
79
+
80
+ database.run "CREATE TABLE #{PROTEIN_TABLE_NAME} (
81
+ gi INT UNSIGNED NOT NULL,
82
+ taxonid INT UNSIGNED NOT NULL,
83
+
84
+ PRIMARY KEY(gi)
85
+ ) engine=innodb charset=utf8;"
86
+
87
+ database.drop_table(NAMES_TABLE_NAME) if database.table_exists?(NAMES_TABLE_NAME)
88
+
89
+ database.run "CREATE TABLE #{NAMES_TABLE_NAME} (
90
+ taxonid MEDIUMINT(11) UNSIGNED NOT NULL,
91
+ name VARCHAR(200) NOT NULL,
92
+ uniquename VARCHAR(100) DEFAULT NULL,
93
+ class VARCHAR(50) NOT NULL DEFAULT '',
94
+
95
+ KEY taxonid (taxonid), KEY type (class), KEY name (name)
96
+ ) ENGINE=INNODB CHARSET=UTF8;"
97
+
98
+ database.drop_table(NODES_TABLE_NAME) if database.table_exists?(NODES_TABLE_NAME)
99
+
100
+ database.run "CREATE TABLE #{NODES_TABLE_NAME} (
101
+ taxonid mediumint(8) unsigned NOT NULL,
102
+ parenttaxonid mediumint(8) unsigned NOT NULL,
103
+ rank varchar(50) default NULL,
104
+ embl_code varchar(20) default NULL,
105
+ division_id smallint(6) NOT NULL,
106
+ inherited_div_flag tinyint(1) unsigned NOT NULL,
107
+ genetic_code_id smallint(6) NOT NULL,
108
+ inherited_gc_flag tinyint(1) unsigned NOT NULL,
109
+ mitochondrial_genetic_codeid smallint(6) NOT NULL,
110
+ inherited_mgc_flag tinyint(1) unsigned NOT NULL,
111
+ genbank_hidden_flag tinyint(1) unsigned NOT NULL,
112
+ hidden_subtree_root_flag tinyint(1) unsigned NOT NULL,
113
+ comments varchar(255) default NULL,
114
+
115
+ PRIMARY KEY (taxonid), KEY parenttaxonid (parenttaxonid)
116
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8"
117
+
118
+ database.run "TRUNCATE #{NAMES_TABLE_NAME}"
119
+ database.run "TRUNCATE #{NODES_TABLE_NAME}"
120
+ database.run "TRUNCATE #{PROTEIN_TABLE_NAME}"
121
+
122
+ database.run "LOAD DATA INFILE '#{dir}/gi_taxid_prot.dmp' INTO TABLE #{PROTEIN_TABLE_NAME} FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' (gi,taxonid);"
123
+
124
+ database.run "LOAD DATA INFILE '#{dir}/names.dmp' INTO TABLE #{NAMES_TABLE_NAME} FIELDS TERMINATED BY '\t|\t' LINES TERMINATED BY '\t|\n' (taxonid, name, uniquename, class);"
125
+
126
+ database.run "LOAD DATA INFILE '#{dir}/nodes.dmp' INTO TABLE #{NODES_TABLE_NAME} FIELDS TERMINATED BY '\t|\t' LINES TERMINATED BY '\t|\n' (taxonid, parenttaxonid,rank,embl_code,division_id,inherited_div_flag,genetic_code_id,inherited_gc_flag, mitochondrial_genetic_codeid,inherited_mgc_flag,genBank_hidden_flag,hidden_subtree_root_flag,comments);"
127
+
128
+ end
129
+
130
+ puts "done!"
131
+ ftp.close
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'csv' #Will use FasterCSV on Ruby 1.8
5
+
6
+ SCRIPT_NAME = "phyta-split"
7
+
8
+ # $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
10
+
11
+ #CSV backwards compatibility
12
+ if CSV.const_defined? :Reader
13
+ require 'fastercsv'
14
+ INSTALLED_CSV = FasterCSV
15
+ else
16
+ INSTALLED_CSV = CSV
17
+ end
18
+
19
+ rootpath = File.dirname(File.dirname(__FILE__))
20
+ PHYTA_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
21
+ puts "Running #{SCRIPT_NAME} #{PHYTA_VERSION}"
22
+
23
+ unless ARGV.size == 1
24
+ puts "Usage: #{SCRIPT_NAME} input.csv"
25
+ puts "This will automatically create input_clean.csv and input_contaminated.csv in the same directory."
26
+ exit
27
+ end
28
+
29
+ #Command line arguments
30
+ settings = {}
31
+ settings[:input_file] = ARGV.shift
32
+
33
+ #Set up output file
34
+ fullpath = File.expand_path(settings[:input_file])
35
+ suffix = File.extname(fullpath)
36
+ dirname = File.dirname(fullpath)
37
+ name = File.basename(fullpath, suffix)
38
+
39
+ settings[:contaminated_file] = dirname + "/" + name + "_contaminated.csv"
40
+ settings[:clean_file] = dirname + "/" + name + "_clean.csv"
41
+
42
+ csv_header = ["query sequence id", "hit accession number", "sgi", "evalue", "species", "subject annotation", "subject score", "kingdom"]
43
+
44
+ #Open input file
45
+ if !File.file?(settings[:input_file])
46
+ puts "No input file at " + File.expand_path(settings[:input_file]) + "!"
47
+ exit
48
+ end
49
+ input = INSTALLED_CSV.open(settings[:input_file], "r", {
50
+ :col_sep => ";",
51
+ :headers => :first_row,
52
+ :header_converters => :symbol})
53
+
54
+ clean_seqs = {}
55
+ contaminated_seqs = {}
56
+ contaminated_filter = [
57
+ "Bacteria",
58
+ "Archaea",
59
+ "Viruses",
60
+ "NONE"
61
+ #TODO is this all?
62
+ ]
63
+
64
+ warning = false;
65
+
66
+ input.each do |current_row|
67
+
68
+ seqid = current_row[:query_sequence_id]
69
+ kingdom = current_row[:kingdom]
70
+
71
+ if !warning
72
+ if current_row[:evalue].include? ','
73
+ puts "***************** Warning *****************"
74
+ puts "This program will produce incorrect output"
75
+ puts "if a comma is used as a decimal divider!"
76
+ puts "*******************************************"
77
+ warning = true
78
+ end
79
+ end
80
+
81
+ seq_is_in_clean = clean_seqs.has_key?(seqid)
82
+ seq_is_in_contaminated = contaminated_seqs.has_key?(seqid)
83
+ kingdom_is_in_contaminated = contaminated_filter.include?(kingdom)
84
+
85
+ if seq_is_in_clean && seq_is_in_contaminated
86
+
87
+ puts "Something went wrong"
88
+ exit
89
+
90
+ elsif !seq_is_in_clean && !seq_is_in_contaminated
91
+
92
+ #Seq is not yet in any of the lists
93
+ if kingdom_is_in_contaminated
94
+ contaminated_seqs[seqid] = current_row
95
+ else
96
+ clean_seqs[seqid] = current_row
97
+ end
98
+
99
+ elsif seq_is_in_clean
100
+
101
+ #Seqs go into clean when they have one hit that's not in the contaminated filter
102
+ #Make sure the seq in the hash has the lower evalue
103
+ if clean_seqs[seqid][:evalue].to_f > current_row[:evalue].to_f
104
+ clean_seqs[seqid] = current_row
105
+ end
106
+
107
+ elsif seq_is_in_contaminated
108
+
109
+ #Seqs go into clean when they have one hit that's not in the contaminated filter
110
+ if kingdom_is_in_contaminated
111
+ #Make sure the seq in the hash has the lower evalue
112
+ if contaminated_seqs[seqid][:evalue].to_f > current_row[:evalue].to_f
113
+ contaminated_seqs[seqid] = current_row
114
+ end
115
+ else
116
+ #One hit is not contaminated, move to clean seqs
117
+ if contaminated_seqs[seqid][:evalue].to_f >= current_row[:evalue].to_f
118
+ clean_seqs[seqid] = current_row
119
+ else
120
+ clean_seqs[seqid] = contaminated_seqs[seqid]
121
+ end
122
+ #Remove row from the list of contaminated seqs
123
+ contaminated_seqs.delete(seqid)
124
+ end
125
+
126
+ else
127
+
128
+ #This should never happen
129
+ puts "Something went wrong..."
130
+ exit
131
+
132
+ end
133
+
134
+ end
135
+
136
+ #make sure that the set of contaminated and clean seqs does not overlap
137
+ unless (clean_seqs.keys & contaminated_seqs.keys).empty?
138
+ puts "Something went wrong!"
139
+ exit
140
+ end
141
+
142
+ #Output
143
+ contaminated = INSTALLED_CSV.open(settings[:contaminated_file], "w", {
144
+ :col_sep => ";",
145
+ :headers => csv_header,
146
+ :write_headers => true})
147
+
148
+ clean = INSTALLED_CSV.open(settings[:clean_file], "w", {
149
+ :col_sep => ";",
150
+ :headers => csv_header,
151
+ :write_headers => true})
152
+
153
+ clean_seqs.each_value {|row| clean << row }
154
+ contaminated_seqs.each_value {|row| contaminated << row }
155
+
156
+ input.close
157
+ clean.close
158
+ contaminated.close
@@ -0,0 +1,51 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ class BlastStringParser
5
+ def initialize
6
+
7
+ end
8
+ #Set up Regexps
9
+ #SPECIES_REGEXP2 = /^.*\[(\w* \w*).*\].*$/ #captures the first two words in square brackets
10
+
11
+ SPECIES_REGEXP2 = /^.*\[(.*)\].*$/ #captures everything in square brackets
12
+
13
+ SGI_REGEXP = /^gi\|(\d+)\|.*$/
14
+ #QUERY_SEQ_REGEXP = /^([a-zA-Z0-9]+)[_|\s].*$/ #This captures everything up to the 1st underscore
15
+ QUERY_SEQ_REGEXP = /^(\S+)\s.*$/ #This captures everything until the first whitespace (more robust)
16
+ #do not expect whitespace after the last | for robustness, strip later
17
+ SUBJ_ANNOTATION_REGEXP = /(?:.*\|)*(.*)\[.*/ #TODO check if this REGEXP captures the right stuff
18
+
19
+ def get_sgi_info(a_hit_id)
20
+ unless SGI_REGEXP.match(a_hit_id)
21
+ raise("Wrong hit id " + a_hit_id)
22
+ else
23
+ return SGI_REGEXP.match(a_hit_id)[1]
24
+ end
25
+ end
26
+
27
+ def get_species_name(a_hit_def)
28
+ unless SPECIES_REGEXP2.match(a_hit_def)
29
+ raise "No species info found!"
30
+ else
31
+ return SPECIES_REGEXP2.match(a_hit_def)[1]
32
+ end
33
+ end
34
+
35
+ def get_subject_annotation(a_hit_def)
36
+ unless SUBJ_ANNOTATION_REGEXP.match(a_hit_def)
37
+ puts "Can not parse subject annotation " + a_hit_def[0..20] + "...\n"
38
+ return a_hit_def
39
+ else
40
+ return SUBJ_ANNOTATION_REGEXP.match(a_hit_def)[1].strip
41
+ end
42
+ end
43
+
44
+ def get_query_seq(a_query)
45
+ unless QUERY_SEQ_REGEXP.match(a_query)
46
+ return a_query
47
+ else
48
+ return QUERY_SEQ_REGEXP.match(a_query)[1]
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,140 @@
1
+ require 'sequel'
2
+
3
+ class KingdomDB
4
+
5
+ ROOT_ID = "1"
6
+ SCIENTIFIC_NAME = "scientific name"
7
+
8
+ def initialize(server, user, password, database)
9
+
10
+ connect_string = 'mysql://'+ server + '/' + database + '?user=' + user
11
+
12
+ if !password.nil?
13
+ connect_string = connect_string + '&password=' + password
14
+ end
15
+
16
+ if !defined?(RUBY_ENGINE)
17
+ #This is most likey 1.8.7
18
+ else
19
+ if RUBY_ENGINE == 'jruby'
20
+ #This is JRuby, using jdbc
21
+ connect_string = 'jdbc:' + connect_string
22
+ end
23
+ end
24
+
25
+ @database = Sequel.connect(connect_string)
26
+ @filter_hit_cache = {}
27
+
28
+ end
29
+
30
+ def id_from_name(taxon_name)
31
+ db_results = @database[:names].select(:taxonid, :class).filter(:name => taxon_name).all
32
+
33
+ if db_results.size > 1
34
+ #If we get more than one result, check if we got a scientific name
35
+ db_results.delete_if{|x| x[:class] != SCIENTIFIC_NAME}
36
+ end
37
+
38
+ if db_results.size > 1
39
+ raise("Results not unique: " + db_results.inspect)
40
+ end
41
+
42
+ if db_results.size == 0
43
+ raise("No results for taxon name " + taxon_name.to_s)
44
+ end
45
+
46
+ return db_results[0][:taxonid].to_s
47
+ end
48
+
49
+ def name_from_id(taxon_id)
50
+ db_results = @database[:names].filter(:taxonid => taxon_id.to_s, :class => SCIENTIFIC_NAME).map(:name)
51
+
52
+ if db_results.size == 0
53
+ raise("No results for taxon id " + taxon_id.to_s)
54
+ elsif db_results.size > 1
55
+ raise("Results not unique: " + db_results.inspect)
56
+ else
57
+ return db_results[0]
58
+ end
59
+ end
60
+
61
+ def parent_id_from_id(taxon_id)
62
+ db_results = @database[:nodes].filter(:taxonid => taxon_id.to_s).map(:parenttaxonid)
63
+
64
+ if db_results.size == 0
65
+ raise("No results for taxon id " + taxon_id.to_s)
66
+ elsif db_results.size > 1
67
+ raise("Results not unique: " + db_results.inspect)
68
+ else
69
+ return db_results[0].to_s
70
+ end
71
+
72
+ end
73
+
74
+ def node_rank_from_id(taxon_id)
75
+ db_results = @database[:nodes].filter(:taxonid => taxon_id.to_s).map(:rank)
76
+
77
+
78
+ if db_results.size == 0
79
+ raise("No results for taxon id " + taxon_id.to_s)
80
+ elsif db_results.size > 1
81
+ raise("Results not unique: " + db_results.inspect)
82
+ else
83
+ return db_results[0].to_s
84
+ end
85
+
86
+ end
87
+
88
+ def id_from_gi(gi_number)
89
+ db_results = @database[:proteingiToTaxonId].filter(:gi => gi_number).map(:taxonid)
90
+
91
+ if db_results.size == 0
92
+ raise("No results for gi " + gi_number.to_s)
93
+ elsif db_results.size > 1
94
+ raise("Results not unique: " + db_results.inspect)
95
+ else
96
+ return db_results[0].to_s
97
+ end
98
+ end
99
+
100
+ def name_from_gi(gi_number)
101
+ taxonid = id_from_gi(gi_number)
102
+ name_from_id(taxonid)
103
+ end
104
+
105
+ def get_filter(name_array)
106
+ filter_hash = Hash[name_array.collect { |taxon_name|
107
+ [taxon_name, id_from_name(taxon_name)]
108
+ }]
109
+ return filter_hash
110
+ end
111
+
112
+ def match_filter(taxon_name, filter_hash)
113
+
114
+ current_species_id = id_from_name(taxon_name)
115
+
116
+ history = []
117
+
118
+ while ((current_species_id.to_i > ROOT_ID.to_i)&&(!filter_hash.has_value?(current_species_id)))
119
+ if @filter_hit_cache.has_key? current_species_id
120
+ #Cache hit
121
+ current_species_id = @filter_hit_cache[current_species_id]
122
+ break
123
+ else
124
+ parent_id = parent_id_from_id(current_species_id)
125
+ history << current_species_id
126
+ current_species_id = parent_id
127
+ end
128
+ end
129
+
130
+ history.each { |i|
131
+ @filter_hit_cache[i] = current_species_id
132
+ }
133
+
134
+ if current_species_id == ROOT_ID
135
+ return nil
136
+ else
137
+ return name_from_id(current_species_id)
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+
16
+ class Test::Unit::TestCase
17
+ end
@@ -0,0 +1,41 @@
1
+ require 'helper'
2
+ require 'tmpdir'
3
+
4
+ class BlackBoxTest < Test::Unit::TestCase
5
+ def test_without_parameters
6
+ #This test does not make a whole lot of sense...
7
+ result = %x[bin/phyta-assign]
8
+ expected = "Invalid arguments, see --help for more information."
9
+ assert_equal expected.strip, result.strip
10
+ end
11
+
12
+ def test_small
13
+ Dir.mktmpdir do |dir|
14
+ %x[bin/phyta-assign -i test/data/in_3.xml -o #{dir}/out_3.csv]
15
+ result = File.open("#{dir}/out_3.csv").read
16
+ target = File.open("test/data/target_3.csv").read
17
+
18
+ assert_not_nil result
19
+ assert_not_nil target
20
+
21
+ assert_equal target, result, "Output of out_3.xml invalid"
22
+ end
23
+ end
24
+
25
+ def test_medium
26
+ Dir.mktmpdir do |dir|
27
+ %x[bin/phyta-assign -i test/data/in_medium.xml -o #{dir}/out_medium.csv]
28
+ result = File.open("#{dir}/out_medium.csv").read
29
+ target = File.open("test/data/target_medium.csv").read
30
+
31
+ assert_not_nil result
32
+ assert_not_nil target
33
+
34
+ assert_block "Output of out_medium.xml invalid." do
35
+ result == target
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+ require 'blast_string_parser'
3
+
4
+ class BlastStringParserTest < Test::Unit::TestCase
5
+ def test_get_species_info
6
+ bsp = BlastStringParser.new()
7
+ assert_equal "Xenopus (Silurana) tropicalis", bsp.get_species_name("PREDICTED: uncharacterized protein K02A2.6-like [Xenopus (Silurana) tropicalis]")
8
+ assert_equal "Corticium_candelabrum", bsp.get_species_name("CC1c114_molpal [Corticium_candelabrum]")
9
+ end
10
+ def test_get_query_seq
11
+ bsp = BlastStringParser.new()
12
+ assert_equal "Aqu1.200003", bsp.get_query_seq("Aqu1.200003")
13
+ assert_equal "AW3C1", bsp.get_query_seq("AW3C1 [Astrosclera_willeyana]")
14
+ assert_equal "AW3C1_molpal", bsp.get_query_seq("AW3C1_molpal")
15
+ assert_equal "CC1c1_molpal", bsp.get_query_seq("CC1c1_molpal [Corticium_candelabrum]")
16
+ assert_equal "CC1c1_molpal", bsp.get_query_seq("CC1c1_molpal [Corticium_candelabrum]")
17
+ assert_equal "CC1c1_molpal", bsp.get_query_seq("CC1c1_molpal \n[Corticium_candelabrum]")
18
+ assert_equal "CC1c1_molpal", bsp.get_query_seq("CC1c1_molpal [Corticium_candelabrum], this is a nice_sequence I found rummaging through my fridge [an older model from AEG]")
19
+ assert_equal "CC1c1", bsp.get_query_seq("CC1c1 (tastes really good with curry)")
20
+ assert_equal "CC1c1_molpal", bsp.get_query_seq("CC1c1_molpal [Corticium_candelabrum] (oh, hai!)")
21
+
22
+ end
23
+ end
@@ -0,0 +1,118 @@
1
+ require 'helper'
2
+
3
+ require 'kingdom_db'
4
+
5
+ class KingdomDbTest < Test::Unit::TestCase
6
+
7
+ def setup
8
+ @db = KingdomDB.new('localhost', 'root', '', 'kingdom_assignment_taxonomy')
9
+ end
10
+
11
+ def test_id_from_taxon_name
12
+ assert_not_nil(@db.id_from_name("Drosophila melanogaster"))
13
+ assert_raise RuntimeError do
14
+ @db.id_from_name("Sarah palin")
15
+ end
16
+ assert_raise RuntimeError do
17
+ @db.id_from_name("")
18
+ end
19
+ assert_raise RuntimeError do
20
+ @db.id_from_name("Shewanella sp")
21
+ end
22
+
23
+ end
24
+ def test_name_from_id
25
+
26
+ homo = @db.id_from_name("Homo sapiens")
27
+ assert_equal "Homo sapiens", @db.name_from_id(homo)
28
+ assert_equal "Homo sapiens", @db.name_from_id(homo.to_s)
29
+ assert_equal "Homo sapiens", @db.name_from_id(homo.to_i)
30
+
31
+ assert_raise RuntimeError do
32
+ @db.name_from_id(0)
33
+ end
34
+ assert_raise RuntimeError do
35
+ @db.name_from_id(-1)
36
+ end
37
+ assert_raise RuntimeError do
38
+ @db.name_from_id(0)
39
+ end
40
+ end
41
+ def test_parent_id_from_id
42
+ assert_equal "7872", @db.parent_id_from_id("7873")
43
+ assert_equal "7872", @db.parent_id_from_id(7873)
44
+ end
45
+ def test_node_rank_from_id
46
+ assert_equal "species", @db.node_rank_from_id("7873")
47
+ assert_equal "species", @db.node_rank_from_id(7873)
48
+ assert_equal "species", @db.node_rank_from_id(@db.id_from_name("Drosophila melanogaster"))
49
+
50
+ assert_equal "genus", @db.node_rank_from_id("7872")
51
+ assert_equal "no rank", @db.node_rank_from_id(1)
52
+ assert_equal "no rank", @db.node_rank_from_id(@db.id_from_name("Woodchuck hepatitis virus 1"))
53
+
54
+ end
55
+
56
+ def test_name_from_gi
57
+ assert_equal "Oryctolagus cuniculus", @db.name_from_gi(1712)
58
+ assert_equal "Tribolium castaneum", @db.name_from_gi("270016927")
59
+ assert_equal "Clypeaster japonicus", @db.name_from_gi(124106306)
60
+ assert_equal "Anthocidaris crassispina", @db.name_from_gi(124106325)
61
+
62
+ assert_equal "Lateolabrax japonicus", @db.name_from_gi(158518390)
63
+ assert_raise RuntimeError do
64
+ @db.name_from_gi(205688854)
65
+ end
66
+
67
+ end
68
+
69
+ def test_match_filter
70
+ filter_array = [
71
+ "Bacteria",
72
+ "Archaea",
73
+ "Metazoa",
74
+ "Viruses"
75
+ ]
76
+
77
+ filter_hash = @db.get_filter(filter_array)
78
+ filter_hash.each { |name, id|
79
+ assert_equal @db.id_from_name(name), id
80
+ }
81
+
82
+ assert_equal ["Bacteria", "Archaea", "Metazoa", "Viruses"], filter_array
83
+ assert_equal "Metazoa", @db.match_filter("Homo sapiens", filter_hash)
84
+ assert_equal "Bacteria", @db.match_filter("Escherichia coli", filter_hash)
85
+ assert_raise RuntimeError do
86
+ assert_equal nil, @db.match_filter("Hello world", filter_hash)
87
+ end
88
+ assert_equal "Bacteria", @db.match_filter("Bacteria", filter_hash)
89
+ assert_equal nil, @db.match_filter("root", filter_hash)
90
+ assert_equal nil, @db.match_filter("Zea mays", filter_hash)
91
+ assert_equal nil, @db.match_filter("cellular organisms", filter_hash)
92
+
93
+ assert_equal "Bacteria", @db.match_filter("Shewanella sp.", filter_hash)
94
+ assert_raise RuntimeError do
95
+ assert_equal nil, @db.match_filter("Homo s", filter_hash)
96
+ end
97
+ assert_raise RuntimeError do
98
+ assert_equal nil, @db.match_filter("sp", filter_hash)
99
+ end
100
+ assert_equal "Metazoa", @db.match_filter("Homo", filter_hash)
101
+
102
+ assert_equal "Viruses", @db.match_filter("Cyanophage Syn26", filter_hash)
103
+ assert_equal "Viruses", @db.match_filter("uncultured phage", filter_hash)
104
+ assert_equal "Bacteria", @db.match_filter("uncultured bacterium", filter_hash)
105
+ assert_equal nil, @db.match_filter("uncultured organism", filter_hash)
106
+ assert_equal "Metazoa", @db.match_filter("Xenopus (Silurana) tropicalis", filter_hash)
107
+ assert_equal "Viruses", @db.match_filter("Pseudomonas phage EL", filter_hash)
108
+ assert_equal "Viruses", @db.match_filter("Pseudomonas phage EL", filter_hash)
109
+
110
+ assert_equal "Metazoa", @db.match_filter("Canis lupus familiaris", filter_hash)
111
+ assert_equal "Metazoa", @db.match_filter("Canis familiaris", filter_hash)
112
+
113
+ assert_raise RuntimeError do
114
+ @db.match_filter(nil, filter_hash)
115
+ end
116
+
117
+ end
118
+ end
metadata ADDED
@@ -0,0 +1,239 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-phyta
3
+ version: !ruby/object:Gem::Version
4
+ hash: 59
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 9
9
+ - 0
10
+ version: 0.9.0
11
+ platform: ruby
12
+ authors:
13
+ - Philipp Comans
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-10-20 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ requirement: &id001 !ruby/object:Gem::Requirement
22
+ none: false
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ hash: 3
27
+ segments:
28
+ - 1
29
+ - 4
30
+ - 2
31
+ version: 1.4.2
32
+ version_requirements: *id001
33
+ name: bio
34
+ prerelease: false
35
+ type: :runtime
36
+ - !ruby/object:Gem::Dependency
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 45
43
+ segments:
44
+ - 2
45
+ - 8
46
+ - 1
47
+ version: 2.8.1
48
+ version_requirements: *id002
49
+ name: mysql
50
+ prerelease: false
51
+ type: :runtime
52
+ - !ruby/object:Gem::Dependency
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 119
59
+ segments:
60
+ - 3
61
+ - 28
62
+ - 0
63
+ version: 3.28.0
64
+ version_requirements: *id003
65
+ name: sequel
66
+ prerelease: false
67
+ type: :runtime
68
+ - !ruby/object:Gem::Dependency
69
+ requirement: &id004 !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 11
75
+ segments:
76
+ - 1
77
+ - 5
78
+ - 4
79
+ version: 1.5.4
80
+ version_requirements: *id004
81
+ name: fastercsv
82
+ prerelease: false
83
+ type: :runtime
84
+ - !ruby/object:Gem::Dependency
85
+ requirement: &id005 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ hash: 3
91
+ segments:
92
+ - 1
93
+ - 5
94
+ - 0
95
+ version: 1.5.0
96
+ version_requirements: *id005
97
+ name: nokogiri
98
+ prerelease: false
99
+ type: :runtime
100
+ - !ruby/object:Gem::Dependency
101
+ requirement: &id006 !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ hash: 83
107
+ segments:
108
+ - 1
109
+ - 16
110
+ - 2
111
+ version: 1.16.2
112
+ version_requirements: *id006
113
+ name: trollop
114
+ prerelease: false
115
+ type: :runtime
116
+ - !ruby/object:Gem::Dependency
117
+ requirement: &id007 !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ hash: 3
123
+ segments:
124
+ - 0
125
+ version: "0"
126
+ version_requirements: *id007
127
+ name: shoulda
128
+ prerelease: false
129
+ type: :development
130
+ - !ruby/object:Gem::Dependency
131
+ requirement: &id008 !ruby/object:Gem::Requirement
132
+ none: false
133
+ requirements:
134
+ - - ~>
135
+ - !ruby/object:Gem::Version
136
+ hash: 23
137
+ segments:
138
+ - 1
139
+ - 0
140
+ - 0
141
+ version: 1.0.0
142
+ version_requirements: *id008
143
+ name: bundler
144
+ prerelease: false
145
+ type: :development
146
+ - !ruby/object:Gem::Dependency
147
+ requirement: &id009 !ruby/object:Gem::Requirement
148
+ none: false
149
+ requirements:
150
+ - - ~>
151
+ - !ruby/object:Gem::Version
152
+ hash: 7
153
+ segments:
154
+ - 1
155
+ - 6
156
+ - 4
157
+ version: 1.6.4
158
+ version_requirements: *id009
159
+ name: jeweler
160
+ prerelease: false
161
+ type: :development
162
+ - !ruby/object:Gem::Dependency
163
+ requirement: &id010 !ruby/object:Gem::Requirement
164
+ none: false
165
+ requirements:
166
+ - - ">="
167
+ - !ruby/object:Gem::Version
168
+ hash: 3
169
+ segments:
170
+ - 0
171
+ version: "0"
172
+ version_requirements: *id010
173
+ name: rcov
174
+ prerelease: false
175
+ type: :development
176
+ description: Coming soon
177
+ email: philipp.comans@googlemail.com
178
+ executables:
179
+ - phyta-split
180
+ - phyta-assign
181
+ - phyta-extract
182
+ - phyta-setup-taxonomy-db
183
+ extensions: []
184
+
185
+ extra_rdoc_files:
186
+ - LICENSE.txt
187
+ - README.rdoc
188
+ files:
189
+ - .document
190
+ - Gemfile
191
+ - LICENSE.txt
192
+ - README.rdoc
193
+ - Rakefile
194
+ - VERSION
195
+ - bin/phyta-assign
196
+ - bin/phyta-extract
197
+ - bin/phyta-setup-taxonomy-db
198
+ - bin/phyta-split
199
+ - lib/blast_string_parser.rb
200
+ - lib/kingdom_db.rb
201
+ - test/helper.rb
202
+ - test/test_blackbox.rb
203
+ - test/test_blast_string_parser.rb
204
+ - test/test_kingdom_db.rb
205
+ homepage: http://github.com/pcomans/bioruby-phyta
206
+ licenses:
207
+ - LGPL
208
+ post_install_message:
209
+ rdoc_options: []
210
+
211
+ require_paths:
212
+ - lib
213
+ required_ruby_version: !ruby/object:Gem::Requirement
214
+ none: false
215
+ requirements:
216
+ - - ">="
217
+ - !ruby/object:Gem::Version
218
+ hash: 3
219
+ segments:
220
+ - 0
221
+ version: "0"
222
+ required_rubygems_version: !ruby/object:Gem::Requirement
223
+ none: false
224
+ requirements:
225
+ - - ">="
226
+ - !ruby/object:Gem::Version
227
+ hash: 3
228
+ segments:
229
+ - 0
230
+ version: "0"
231
+ requirements: []
232
+
233
+ rubyforge_project:
234
+ rubygems_version: 1.8.10
235
+ signing_key:
236
+ specification_version: 3
237
+ summary: Pipeline to remove contaminations from EST libraries
238
+ test_files: []
239
+