bio-phyta 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +165 -20
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/bin/phyta-assign +22 -30
- data/bin/phyta-extract +33 -35
- data/bin/phyta-split +56 -28
- data/lib/kingdom_db.rb +7 -1
- data/test/test_blackbox_assign.rb +68 -0
- data/test/test_blackbox_extract.rb +58 -0
- data/test/test_blackbox_split.rb +116 -0
- metadata +109 -166
- data/test/test_blackbox.rb +0 -41
data/LICENSE.txt
CHANGED
@@ -1,20 +1,165 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
the
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
1
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
2
|
+
Version 3, 29 June 2007
|
3
|
+
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
6
|
+
of this license document, but changing it is not allowed.
|
7
|
+
|
8
|
+
|
9
|
+
This version of the GNU Lesser General Public License incorporates
|
10
|
+
the terms and conditions of version 3 of the GNU General Public
|
11
|
+
License, supplemented by the additional permissions listed below.
|
12
|
+
|
13
|
+
0. Additional Definitions.
|
14
|
+
|
15
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
16
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
17
|
+
General Public License.
|
18
|
+
|
19
|
+
"The Library" refers to a covered work governed by this License,
|
20
|
+
other than an Application or a Combined Work as defined below.
|
21
|
+
|
22
|
+
An "Application" is any work that makes use of an interface provided
|
23
|
+
by the Library, but which is not otherwise based on the Library.
|
24
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
25
|
+
of using an interface provided by the Library.
|
26
|
+
|
27
|
+
A "Combined Work" is a work produced by combining or linking an
|
28
|
+
Application with the Library. The particular version of the Library
|
29
|
+
with which the Combined Work was made is also called the "Linked
|
30
|
+
Version".
|
31
|
+
|
32
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
33
|
+
Corresponding Source for the Combined Work, excluding any source code
|
34
|
+
for portions of the Combined Work that, considered in isolation, are
|
35
|
+
based on the Application, and not on the Linked Version.
|
36
|
+
|
37
|
+
The "Corresponding Application Code" for a Combined Work means the
|
38
|
+
object code and/or source code for the Application, including any data
|
39
|
+
and utility programs needed for reproducing the Combined Work from the
|
40
|
+
Application, but excluding the System Libraries of the Combined Work.
|
41
|
+
|
42
|
+
1. Exception to Section 3 of the GNU GPL.
|
43
|
+
|
44
|
+
You may convey a covered work under sections 3 and 4 of this License
|
45
|
+
without being bound by section 3 of the GNU GPL.
|
46
|
+
|
47
|
+
2. Conveying Modified Versions.
|
48
|
+
|
49
|
+
If you modify a copy of the Library, and, in your modifications, a
|
50
|
+
facility refers to a function or data to be supplied by an Application
|
51
|
+
that uses the facility (other than as an argument passed when the
|
52
|
+
facility is invoked), then you may convey a copy of the modified
|
53
|
+
version:
|
54
|
+
|
55
|
+
a) under this License, provided that you make a good faith effort to
|
56
|
+
ensure that, in the event an Application does not supply the
|
57
|
+
function or data, the facility still operates, and performs
|
58
|
+
whatever part of its purpose remains meaningful, or
|
59
|
+
|
60
|
+
b) under the GNU GPL, with none of the additional permissions of
|
61
|
+
this License applicable to that copy.
|
62
|
+
|
63
|
+
3. Object Code Incorporating Material from Library Header Files.
|
64
|
+
|
65
|
+
The object code form of an Application may incorporate material from
|
66
|
+
a header file that is part of the Library. You may convey such object
|
67
|
+
code under terms of your choice, provided that, if the incorporated
|
68
|
+
material is not limited to numerical parameters, data structure
|
69
|
+
layouts and accessors, or small macros, inline functions and templates
|
70
|
+
(ten or fewer lines in length), you do both of the following:
|
71
|
+
|
72
|
+
a) Give prominent notice with each copy of the object code that the
|
73
|
+
Library is used in it and that the Library and its use are
|
74
|
+
covered by this License.
|
75
|
+
|
76
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
77
|
+
document.
|
78
|
+
|
79
|
+
4. Combined Works.
|
80
|
+
|
81
|
+
You may convey a Combined Work under terms of your choice that,
|
82
|
+
taken together, effectively do not restrict modification of the
|
83
|
+
portions of the Library contained in the Combined Work and reverse
|
84
|
+
engineering for debugging such modifications, if you also do each of
|
85
|
+
the following:
|
86
|
+
|
87
|
+
a) Give prominent notice with each copy of the Combined Work that
|
88
|
+
the Library is used in it and that the Library and its use are
|
89
|
+
covered by this License.
|
90
|
+
|
91
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
92
|
+
document.
|
93
|
+
|
94
|
+
c) For a Combined Work that displays copyright notices during
|
95
|
+
execution, include the copyright notice for the Library among
|
96
|
+
these notices, as well as a reference directing the user to the
|
97
|
+
copies of the GNU GPL and this license document.
|
98
|
+
|
99
|
+
d) Do one of the following:
|
100
|
+
|
101
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
102
|
+
License, and the Corresponding Application Code in a form
|
103
|
+
suitable for, and under terms that permit, the user to
|
104
|
+
recombine or relink the Application with a modified version of
|
105
|
+
the Linked Version to produce a modified Combined Work, in the
|
106
|
+
manner specified by section 6 of the GNU GPL for conveying
|
107
|
+
Corresponding Source.
|
108
|
+
|
109
|
+
1) Use a suitable shared library mechanism for linking with the
|
110
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
111
|
+
a copy of the Library already present on the user's computer
|
112
|
+
system, and (b) will operate properly with a modified version
|
113
|
+
of the Library that is interface-compatible with the Linked
|
114
|
+
Version.
|
115
|
+
|
116
|
+
e) Provide Installation Information, but only if you would otherwise
|
117
|
+
be required to provide such information under section 6 of the
|
118
|
+
GNU GPL, and only to the extent that such information is
|
119
|
+
necessary to install and execute a modified version of the
|
120
|
+
Combined Work produced by recombining or relinking the
|
121
|
+
Application with a modified version of the Linked Version. (If
|
122
|
+
you use option 4d0, the Installation Information must accompany
|
123
|
+
the Minimal Corresponding Source and Corresponding Application
|
124
|
+
Code. If you use option 4d1, you must provide the Installation
|
125
|
+
Information in the manner specified by section 6 of the GNU GPL
|
126
|
+
for conveying Corresponding Source.)
|
127
|
+
|
128
|
+
5. Combined Libraries.
|
129
|
+
|
130
|
+
You may place library facilities that are a work based on the
|
131
|
+
Library side by side in a single library together with other library
|
132
|
+
facilities that are not Applications and are not covered by this
|
133
|
+
License, and convey such a combined library under terms of your
|
134
|
+
choice, if you do both of the following:
|
135
|
+
|
136
|
+
a) Accompany the combined library with a copy of the same work based
|
137
|
+
on the Library, uncombined with any other library facilities,
|
138
|
+
conveyed under the terms of this License.
|
139
|
+
|
140
|
+
b) Give prominent notice with the combined library that part of it
|
141
|
+
is a work based on the Library, and explaining where to find the
|
142
|
+
accompanying uncombined form of the same work.
|
143
|
+
|
144
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
145
|
+
|
146
|
+
The Free Software Foundation may publish revised and/or new versions
|
147
|
+
of the GNU Lesser General Public License from time to time. Such new
|
148
|
+
versions will be similar in spirit to the present version, but may
|
149
|
+
differ in detail to address new problems or concerns.
|
150
|
+
|
151
|
+
Each version is given a distinguishing version number. If the
|
152
|
+
Library as you received it specifies that a certain numbered version
|
153
|
+
of the GNU Lesser General Public License "or any later version"
|
154
|
+
applies to it, you have the option of following the terms and
|
155
|
+
conditions either of that published version or of any later version
|
156
|
+
published by the Free Software Foundation. If the Library as you
|
157
|
+
received it does not specify a version number of the GNU Lesser
|
158
|
+
General Public License, you may choose any version of the GNU Lesser
|
159
|
+
General Public License ever published by the Free Software Foundation.
|
160
|
+
|
161
|
+
If the Library as you received it specifies that a proxy can decide
|
162
|
+
whether future versions of the GNU Lesser General Public License shall
|
163
|
+
apply, that proxy's public statement of acceptance of any version is
|
164
|
+
permanent authorization for you to choose that version for the
|
165
|
+
Library.
|
data/Rakefile
CHANGED
@@ -15,10 +15,10 @@ require 'jeweler'
|
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
17
|
gem.name = "bio-phyta"
|
18
|
-
gem.homepage = "
|
18
|
+
gem.homepage = "https://github.com/PalMuc/bio-phyta"
|
19
19
|
gem.license = "LGPL"
|
20
20
|
gem.summary = "Pipeline to remove contaminations from EST libraries"
|
21
|
-
gem.description = "
|
21
|
+
gem.description = "Pipeline to remove contaminations from EST libraries"
|
22
22
|
gem.email = "philipp.comans@googlemail.com"
|
23
23
|
gem.authors = ["Philipp Comans"]
|
24
24
|
# Remove test data from the gem
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.1
|
data/bin/phyta-assign
CHANGED
@@ -13,6 +13,7 @@ opts = Trollop::options do
|
|
13
13
|
opt :database_user, "Optional: The name of the database user", :type => String, :default => "root", :short => "-u"
|
14
14
|
opt :database_password, "Optional: The password of the database user", :type => String, :default => "no password", :short => "-p"
|
15
15
|
opt :database_name, "Optional: The name of the NCBI taxonomy database", :type => String, :default => "kingdom_assignment_taxonomy", :short => "-n"
|
16
|
+
opt :filter, "A file in YAML format containing a list of taxa to be considered contaminants", :type => String, :default => "Use builtin filter capturing Bacteria, Archaea, Viruses and NONE. To learn how to write your own filters, visit https://github.com/PalMuc/bio-phyta/wiki/Custom-filters ", :short => "-f"
|
16
17
|
end
|
17
18
|
|
18
19
|
unless opts[:input_file_given] && opts[:output_file_given]
|
@@ -35,6 +36,7 @@ end
|
|
35
36
|
require 'sequel'
|
36
37
|
require 'nokogiri'
|
37
38
|
require 'bio'
|
39
|
+
require 'yaml'
|
38
40
|
|
39
41
|
require 'csv'
|
40
42
|
|
@@ -57,6 +59,26 @@ puts "Running #{SCRIPT_NAME} #{PHYTA_VERSION}"
|
|
57
59
|
|
58
60
|
puts "Settings: " + opts.inspect
|
59
61
|
|
62
|
+
filter_array = nil
|
63
|
+
|
64
|
+
if opts[:filter_given]
|
65
|
+
begin
|
66
|
+
filter_array = YAML::load(File.open(opts[:filter], 'r'))
|
67
|
+
rescue Exception => e
|
68
|
+
puts "Error: #{e.message}"
|
69
|
+
puts e.backtrace.join("\n")
|
70
|
+
puts "Please see https://github.com/PalMuc/bio-phyta/wiki/Custom-filters for instructions on how to write filters"
|
71
|
+
abort
|
72
|
+
end
|
73
|
+
|
74
|
+
unless filter_array.is_a? Array
|
75
|
+
puts "Error: Invalid filter format.\nPlease see https://github.com/PalMuc/bio-phyta/wiki/Custom-filters for instructions on how to write filters"
|
76
|
+
abort
|
77
|
+
end
|
78
|
+
else
|
79
|
+
filter_array = KingdomDB::DEFAULT_FILTER
|
80
|
+
end
|
81
|
+
|
60
82
|
#Initialize auxiliary classes
|
61
83
|
|
62
84
|
blast_parser = BlastStringParser.new()
|
@@ -85,36 +107,6 @@ output = INSTALLED_CSV.open(opts[:output_file], "w", {
|
|
85
107
|
:headers => ["query sequence id", "hit accession number", "sgi", "evalue", "species", "subject annotation", "subject score", "kingdom"],
|
86
108
|
:write_headers => true})
|
87
109
|
|
88
|
-
filter_array = [
|
89
|
-
"Bacteria",
|
90
|
-
"Archaea",
|
91
|
-
"Viridiplantae",
|
92
|
-
"Rhodophyta",
|
93
|
-
"Glaucocystophyceae",
|
94
|
-
"Alveolata",
|
95
|
-
"Cryptophyta",
|
96
|
-
"stramenopiles", #<- Change
|
97
|
-
"Amoebozoa",
|
98
|
-
"Apusozoa",
|
99
|
-
"Euglenozoa",
|
100
|
-
"Fornicata",
|
101
|
-
"Haptophyceae",
|
102
|
-
"Heterolobosea",
|
103
|
-
"Jakobida",
|
104
|
-
"Katablepharidophyta",
|
105
|
-
"Malawimonadidae",
|
106
|
-
"Nucleariidae",
|
107
|
-
"Oxymonadida",
|
108
|
-
"Parabasalia",
|
109
|
-
"Rhizaria",
|
110
|
-
"unclassified eukaryotes",
|
111
|
-
"Fungi",
|
112
|
-
"Metazoa",
|
113
|
-
"Choanoflagellida",
|
114
|
-
"Opisthokonta incertae sedis", #"Fungi/Metazoa incertae sedis"
|
115
|
-
"Viruses"
|
116
|
-
]
|
117
|
-
|
118
110
|
filter_hash = db.get_filter(filter_array)
|
119
111
|
|
120
112
|
current_query = ""
|
data/bin/phyta-extract
CHANGED
@@ -19,54 +19,52 @@ def table_to_set(table, header)
|
|
19
19
|
return result
|
20
20
|
end
|
21
21
|
|
22
|
+
require 'rubygems'
|
23
|
+
require 'csv'
|
24
|
+
require 'set'
|
25
|
+
require 'bio'
|
26
|
+
require 'trollop'
|
27
|
+
|
22
28
|
#parse command line arguments
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
29
|
+
opts = Trollop::options do
|
30
|
+
opt :fasta, "The file containing the sequences in FASTA format", :type => String
|
31
|
+
opt :input_clean, "The name of the clean sequence table in CSV format", :type => String, :short => "-c"
|
32
|
+
opt :input_contaminated, "The name of the contaminated sequence table in CSV format", :type => String, :short => "-d"
|
33
|
+
|
34
|
+
opt :output_clean, "The name of the FASTA file where clean sequences will be written to", :type => String, :short => "-o"
|
35
|
+
opt :output_contaminated, "The name of the FASTA file where contaminated sequences will be written to", :type => String, :short => "-p"
|
36
|
+
end
|
37
|
+
|
38
|
+
unless opts[:fasta_given] && opts[:input_clean_given] && opts[:input_contaminated_given] && opts[:output_clean_given] && opts[:output_contaminated_given]
|
39
|
+
puts "Invalid arguments, see --help for more information."
|
40
|
+
abort
|
27
41
|
end
|
28
42
|
|
29
43
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
30
44
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
31
45
|
|
32
|
-
require 'rubygems'
|
33
|
-
require 'csv'
|
34
|
-
require 'set'
|
35
|
-
require 'bio'
|
36
|
-
|
37
46
|
rootpath = File.dirname(File.dirname(__FILE__))
|
38
47
|
PHYTA_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
39
48
|
puts "Running #{SCRIPT_NAME} #{PHYTA_VERSION}"
|
40
49
|
|
41
|
-
|
42
|
-
|
43
|
-
settings[:input_contaminated] = ARGV.shift
|
44
|
-
settings[:output_clean] = ARGV.shift
|
45
|
-
settings[:output_contaminated] = ARGV.shift
|
46
|
-
|
47
|
-
unless File.exists?(settings[:input_fasta])
|
48
|
-
puts "The input file at " + File.expand_path(settings[:input_fasta]) + " could not be opened!"
|
49
|
-
exit
|
50
|
+
unless File.exists?(opts[:fasta])
|
51
|
+
abort "The input file at " + File.expand_path(opts[:fasta]) + " could not be opened!"
|
50
52
|
end
|
51
53
|
|
52
|
-
unless File.exists?(
|
53
|
-
|
54
|
-
exit
|
54
|
+
unless File.exists?(opts[:input_clean])
|
55
|
+
abort "The input file at " + File.expand_path(opts[:input_clean]) + " could not be opened!"
|
55
56
|
end
|
56
57
|
|
57
|
-
unless File.exists?(
|
58
|
-
|
59
|
-
exit
|
58
|
+
unless File.exists?(opts[:input_contaminated])
|
59
|
+
abort "The input file at " + File.expand_path(opts[:input_contaminated]) + " could not be opened!"
|
60
60
|
end
|
61
61
|
|
62
|
-
if File.exists?(
|
63
|
-
|
64
|
-
exit
|
62
|
+
if File.exists?(opts[:output_clean])
|
63
|
+
abort "The input file at " + File.expand_path(opts[:output_clean]) + " already exists!"
|
65
64
|
end
|
66
65
|
|
67
|
-
if File.exists?(
|
68
|
-
|
69
|
-
exit
|
66
|
+
if File.exists?(opts[:output_contaminated])
|
67
|
+
abort "The input file at " + File.expand_path(opts[:output_contaminated]) + " already exists!"
|
70
68
|
end
|
71
69
|
|
72
70
|
#CSV backwards compatibility
|
@@ -79,23 +77,23 @@ end
|
|
79
77
|
|
80
78
|
#Open output of Kingdom-Splitter, save clean and contaminated sequence ids in two sets
|
81
79
|
puts "Reading clean..."
|
82
|
-
clean_table = INSTALLED_CSV.open(
|
80
|
+
clean_table = INSTALLED_CSV.open(opts[:input_clean], "r", { :col_sep => ";", :headers => :first_row, :header_converters => :symbol})
|
83
81
|
clean = table_to_set(clean_table, :query_sequence_id)
|
84
82
|
clean_table.close
|
85
83
|
|
86
84
|
puts "Reading contaminated..."
|
87
|
-
contaminated_table = INSTALLED_CSV.open(
|
85
|
+
contaminated_table = INSTALLED_CSV.open(opts[:input_contaminated], "r", { :col_sep => ";", :headers => :first_row, :header_converters => :symbol})
|
88
86
|
contaminated = table_to_set(contaminated_table, :query_sequence_id)
|
89
87
|
contaminated_table.close
|
90
88
|
|
91
89
|
#Initialize output files
|
92
|
-
clean_out = File.open(
|
93
|
-
contaminated_out = File.open(
|
90
|
+
clean_out = File.open(opts[:output_clean], "w")
|
91
|
+
contaminated_out = File.open(opts[:output_contaminated], "w")
|
94
92
|
|
95
93
|
puts "Extracting FASTA sequences..."
|
96
94
|
QUERY_SEQ_REGEXP = /\A(\S+)\s.*\z/ #Make sure this is exactly the same as in BlastStringParser in Kingdom-Assignment
|
97
95
|
|
98
|
-
sequences = Bio::FastaFormat.open(
|
96
|
+
sequences = Bio::FastaFormat.open(opts[:fasta])
|
99
97
|
sequences.each do |entry|
|
100
98
|
current = QUERY_SEQ_REGEXP.match(entry.definition)[1] #TODO do something when this comparison fails
|
101
99
|
if clean.include?(current)
|
data/bin/phyta-split
CHANGED
@@ -1,13 +1,30 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
+
require 'trollop'
|
4
5
|
require 'csv' #Will use FasterCSV on Ruby 1.8
|
6
|
+
require 'yaml'
|
5
7
|
|
6
8
|
SCRIPT_NAME = "phyta-split"
|
7
9
|
|
8
|
-
#
|
10
|
+
#parse command line arguments
|
11
|
+
opts = Trollop::options do
|
12
|
+
opt :input_file, "The output of phyta-assign in CSV format", :type => String
|
13
|
+
opt :output_clean, "The name of the clean output table in CSV format", :type => String, :default => "[name_of_input_file]_clean.csv", :short => "-c"
|
14
|
+
opt :output_contaminated, "The name of the contaminated output table in CSV format", :type => String, :default => "[name_of_input_file]_contaminated.csv", :short => "-d"
|
15
|
+
opt :filter, "Optional: A file in YAML format containing a list of taxa to be considered contaminants", :type => String, :default => "Use builtin filter capturing Bacteria, Archaea, Viruses and NONE. To learn how to write your own filters, visit https://github.com/PalMuc/bio-phyta/wiki/Custom-filters ", :short => "-f"
|
16
|
+
end
|
17
|
+
|
18
|
+
unless opts[:input_file_given]
|
19
|
+
puts "Invalid arguments, see --help for more information."
|
20
|
+
abort
|
21
|
+
end
|
22
|
+
|
23
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
9
24
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
10
25
|
|
26
|
+
require 'kingdom_db'
|
27
|
+
|
11
28
|
#CSV backwards compatibility
|
12
29
|
if CSV.const_defined? :Reader
|
13
30
|
require 'fastercsv'
|
@@ -20,46 +37,57 @@ rootpath = File.dirname(File.dirname(__FILE__))
|
|
20
37
|
PHYTA_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
21
38
|
puts "Running #{SCRIPT_NAME} #{PHYTA_VERSION}"
|
22
39
|
|
23
|
-
unless ARGV.size == 1
|
24
|
-
puts "Usage: #{SCRIPT_NAME} input.csv"
|
25
|
-
puts "This will automatically create input_clean.csv and input_contaminated.csv in the same directory."
|
26
|
-
exit
|
27
|
-
end
|
28
|
-
|
29
|
-
#Command line arguments
|
30
|
-
settings = {}
|
31
|
-
settings[:input_file] = ARGV.shift
|
32
|
-
|
33
40
|
#Set up output file
|
34
|
-
fullpath = File.expand_path(
|
41
|
+
fullpath = File.expand_path(opts[:input_file])
|
35
42
|
suffix = File.extname(fullpath)
|
36
43
|
dirname = File.dirname(fullpath)
|
37
44
|
name = File.basename(fullpath, suffix)
|
38
45
|
|
39
|
-
|
40
|
-
|
46
|
+
unless opts[:output_clean_given]
|
47
|
+
opts[:output_clean] = dirname + "/" + name + "_clean.csv"
|
48
|
+
end
|
49
|
+
|
50
|
+
unless opts[:output_contaminated_given]
|
51
|
+
opts[:output_contaminated] = dirname + "/" + name + "_contaminated.csv"
|
52
|
+
end
|
53
|
+
|
54
|
+
filter_array = nil
|
55
|
+
|
56
|
+
if opts[:filter_given]
|
57
|
+
begin
|
58
|
+
filter_array = YAML::load(File.open(opts[:filter], 'r'))
|
59
|
+
rescue Exception => e
|
60
|
+
puts "Error: #{e.message}"
|
61
|
+
puts e.backtrace.join("\n")
|
62
|
+
puts "Please see https://github.com/PalMuc/bio-phyta/wiki/Custom-filters for instructions on how to write filters"
|
63
|
+
abort
|
64
|
+
end
|
65
|
+
|
66
|
+
unless filter_array.is_a? Array
|
67
|
+
puts "Error: Invalid filter format.\nPlease see https://github.com/PalMuc/bio-phyta/wiki/Custom-filters for instructions on how to write filters"
|
68
|
+
abort
|
69
|
+
end
|
70
|
+
else
|
71
|
+
filter_array = KingdomDB::DEFAULT_FILTER
|
72
|
+
end
|
41
73
|
|
42
74
|
csv_header = ["query sequence id", "hit accession number", "sgi", "evalue", "species", "subject annotation", "subject score", "kingdom"]
|
43
75
|
|
44
76
|
#Open input file
|
45
|
-
if !File.file?(
|
46
|
-
puts "No input file at " + File.expand_path(
|
77
|
+
if !File.file?(opts[:input_file])
|
78
|
+
puts "No input file at " + File.expand_path(opts[:input_file]) + "!"
|
47
79
|
exit
|
48
80
|
end
|
49
|
-
input = INSTALLED_CSV.open(
|
81
|
+
input = INSTALLED_CSV.open(opts[:input_file], "r", {
|
50
82
|
:col_sep => ";",
|
51
83
|
:headers => :first_row,
|
52
84
|
:header_converters => :symbol})
|
53
85
|
|
54
86
|
clean_seqs = {}
|
55
87
|
contaminated_seqs = {}
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
"Viruses",
|
60
|
-
"NONE"
|
61
|
-
#TODO is this all?
|
62
|
-
]
|
88
|
+
#TODO make sure filters are consistent
|
89
|
+
|
90
|
+
|
63
91
|
|
64
92
|
warning = false;
|
65
93
|
|
@@ -80,7 +108,7 @@ input.each do |current_row|
|
|
80
108
|
|
81
109
|
seq_is_in_clean = clean_seqs.has_key?(seqid)
|
82
110
|
seq_is_in_contaminated = contaminated_seqs.has_key?(seqid)
|
83
|
-
kingdom_is_in_contaminated =
|
111
|
+
kingdom_is_in_contaminated = filter_array.include?(kingdom)
|
84
112
|
|
85
113
|
if seq_is_in_clean && seq_is_in_contaminated
|
86
114
|
|
@@ -114,7 +142,7 @@ input.each do |current_row|
|
|
114
142
|
end
|
115
143
|
else
|
116
144
|
#One hit is not contaminated, move to clean seqs
|
117
|
-
if contaminated_seqs[seqid][:evalue].to_f
|
145
|
+
if contaminated_seqs[seqid][:evalue].to_f > current_row[:evalue].to_f
|
118
146
|
clean_seqs[seqid] = current_row
|
119
147
|
else
|
120
148
|
clean_seqs[seqid] = contaminated_seqs[seqid]
|
@@ -140,12 +168,12 @@ unless (clean_seqs.keys & contaminated_seqs.keys).empty?
|
|
140
168
|
end
|
141
169
|
|
142
170
|
#Output
|
143
|
-
contaminated = INSTALLED_CSV.open(
|
171
|
+
contaminated = INSTALLED_CSV.open(opts[:output_contaminated], "w", {
|
144
172
|
:col_sep => ";",
|
145
173
|
:headers => csv_header,
|
146
174
|
:write_headers => true})
|
147
175
|
|
148
|
-
clean = INSTALLED_CSV.open(
|
176
|
+
clean = INSTALLED_CSV.open(opts[:output_clean], "w", {
|
149
177
|
:col_sep => ";",
|
150
178
|
:headers => csv_header,
|
151
179
|
:write_headers => true})
|
data/lib/kingdom_db.rb
CHANGED
@@ -4,6 +4,12 @@ class KingdomDB
|
|
4
4
|
|
5
5
|
ROOT_ID = "1"
|
6
6
|
SCIENTIFIC_NAME = "scientific name"
|
7
|
+
DEFAULT_FILTER = [
|
8
|
+
"Bacteria",
|
9
|
+
"Archaea",
|
10
|
+
"Viruses",
|
11
|
+
"NONE"
|
12
|
+
]
|
7
13
|
|
8
14
|
def initialize(server, user, password, database)
|
9
15
|
|
@@ -26,7 +32,7 @@ class KingdomDB
|
|
26
32
|
@filter_hit_cache = {}
|
27
33
|
|
28
34
|
end
|
29
|
-
|
35
|
+
|
30
36
|
def id_from_name(taxon_name)
|
31
37
|
db_results = @database[:names].select(:taxonid, :class).filter(:name => taxon_name).all
|
32
38
|
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
|
4
|
+
class BlackBoxTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
ASSIGN_DATADIR = "test/data/assign"
|
7
|
+
|
8
|
+
def test_without_parameters
|
9
|
+
#This test does not make a whole lot of sense...
|
10
|
+
result = %x[bin/phyta-assign]
|
11
|
+
expected = "Invalid arguments, see --help for more information."
|
12
|
+
assert_equal expected.strip, result.strip
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_default_filter
|
16
|
+
Dir.mktmpdir do |dir|
|
17
|
+
%x[bin/phyta-assign -i #{ASSIGN_DATADIR}/in_medium.xml -o #{dir}/out_default_filter.csv]
|
18
|
+
result = File.open("#{dir}/out_default_filter.csv").read
|
19
|
+
target = File.open("#{ASSIGN_DATADIR}/target_default_filter.csv").read
|
20
|
+
|
21
|
+
assert_not_nil result
|
22
|
+
assert_not_nil target
|
23
|
+
|
24
|
+
assert_block "Output of out_medium.xml invalid." do
|
25
|
+
result == target
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_invalid_filter
|
31
|
+
Dir.mktmpdir do |dir|
|
32
|
+
response = %x[bin/phyta-assign -i #{ASSIGN_DATADIR}/in_medium.xml -o #{dir}/out_default_filter.csv -f #{ASSIGN_DATADIR}/in_medium.xml]
|
33
|
+
assert response.include? "Error"
|
34
|
+
assert !File.exist?("#{dir}/out_default_filter.csv")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_small
|
39
|
+
Dir.mktmpdir do |dir|
|
40
|
+
res = %x[bin/phyta-assign -i #{ASSIGN_DATADIR}/in_3.xml -o #{dir}/out_3.csv -f #{SPLIT_DATADIR}/../common/default_filter.yaml]
|
41
|
+
|
42
|
+
result = File.open("#{dir}/out_3.csv").read
|
43
|
+
target = File.open("#{ASSIGN_DATADIR}/target_3.csv").read
|
44
|
+
|
45
|
+
assert_not_nil result
|
46
|
+
assert_not_nil target
|
47
|
+
|
48
|
+
assert_equal target, result, "Output of out_3.xml invalid"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_medium
|
53
|
+
Dir.mktmpdir do |dir|
|
54
|
+
%x[bin/phyta-assign -i #{ASSIGN_DATADIR}/in_medium.xml -o #{dir}/out_medium.csv -f #{SPLIT_DATADIR}/../common/default_filter.yaml]
|
55
|
+
result = File.open("#{dir}/out_medium.csv").read
|
56
|
+
target = File.open("#{ASSIGN_DATADIR}/target_medium.csv").read
|
57
|
+
|
58
|
+
assert_not_nil result
|
59
|
+
assert_not_nil target
|
60
|
+
|
61
|
+
assert_block "Output of out_medium.xml invalid." do
|
62
|
+
result == target
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
|
4
|
+
class BlackBoxTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
EXTRACT_DATADIR = "test/data/extract"
|
7
|
+
EXTRACT_BINARY = "bin/phyta-extract"
|
8
|
+
|
9
|
+
context "Extract command line output" do
|
10
|
+
should "print default message if run without parameters" do
|
11
|
+
result = %x[#{EXTRACT_BINARY}]
|
12
|
+
expected = "Invalid arguments, see --help for more information."
|
13
|
+
assert_equal expected.strip, result.strip
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "Extracting" do
|
18
|
+
should "work if the clean file is empty" do
|
19
|
+
Dir.mktmpdir do |dir|
|
20
|
+
result = %x[#{EXTRACT_BINARY} -c #{EXTRACT_DATADIR}/clean_empty_clean.csv -d #{EXTRACT_DATADIR}/clean_empty_contaminated.csv -f #{EXTRACT_DATADIR}/truncated.fasta -o #{dir}/clean_empty_clean_out.fasta -p #{dir}/clean_empty_contaminated_out.fasta]
|
21
|
+
|
22
|
+
clean_result = File.open("#{dir}/clean_empty_clean_out.fasta").read
|
23
|
+
contaminated_result = File.open("#{dir}/clean_empty_contaminated_out.fasta").read
|
24
|
+
|
25
|
+
clean_target = File.open("#{EXTRACT_DATADIR}/clean_empty_clean_target.fasta").read
|
26
|
+
contaminated_target = File.open("#{EXTRACT_DATADIR}/clean_empty_contaminated_target.fasta").read
|
27
|
+
|
28
|
+
assert_not_nil clean_result
|
29
|
+
assert_not_nil contaminated_result
|
30
|
+
assert_not_nil clean_target
|
31
|
+
assert_not_nil contaminated_target
|
32
|
+
|
33
|
+
assert_equal clean_target, clean_result, "Clean files differ"
|
34
|
+
assert_equal contaminated_target, contaminated_result, "Contaminated files differ"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
should "work if the contaminated file is empty" do
|
38
|
+
Dir.mktmpdir do |dir|
|
39
|
+
result = %x[#{EXTRACT_BINARY} -c #{EXTRACT_DATADIR}/contaminated_empty_clean.csv -d #{EXTRACT_DATADIR}/contaminated_empty_contaminated.csv -f #{EXTRACT_DATADIR}/truncated.fasta -o #{dir}/contaminated_empty_clean_out.fasta -p #{dir}/contaminated_empty_contaminated_out.fasta]
|
40
|
+
|
41
|
+
clean_result = File.open("#{dir}/contaminated_empty_clean_out.fasta").read
|
42
|
+
contaminated_result = File.open("#{dir}/contaminated_empty_contaminated_out.fasta").read
|
43
|
+
|
44
|
+
clean_target = File.open("#{EXTRACT_DATADIR}/contaminated_empty_clean_target.fasta").read
|
45
|
+
contaminated_target = File.open("#{EXTRACT_DATADIR}/contaminated_empty_contaminated_target.fasta").read
|
46
|
+
|
47
|
+
assert_not_nil clean_result
|
48
|
+
assert_not_nil contaminated_result
|
49
|
+
assert_not_nil clean_target
|
50
|
+
assert_not_nil contaminated_target
|
51
|
+
|
52
|
+
assert_equal clean_target, clean_result, "Clean files differ"
|
53
|
+
assert_equal contaminated_target, contaminated_result, "Contaminated files differ"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
|
4
|
+
class BlackBoxTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
SPLIT_DATADIR = "test/data/split"
|
7
|
+
|
8
|
+
context "Command line output" do
|
9
|
+
should "print default message if run without parameters" do
|
10
|
+
result = %x[bin/phyta-split]
|
11
|
+
expected = "Invalid arguments, see --help for more information."
|
12
|
+
assert_equal expected.strip, result.strip
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context "Filter parsing" do
|
17
|
+
should "print an error if the filter file is invalid" do
|
18
|
+
Dir.mktmpdir do |dir|
|
19
|
+
response = %x[bin/phyta-split -i #{SPLIT_DATADIR}/in_okay.csv -c #{dir}/clean_okay.csv -d #{dir}/contaminated_okay.csv -f #{SPLIT_DATADIR}/in_okay.csv]
|
20
|
+
assert response.include? "Error"
|
21
|
+
assert !File.exist?("#{dir}/clean_okay.csv")
|
22
|
+
assert !File.exist?("#{dir}/contaminated_okay.csv")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
context "PhyTA Split" do
|
29
|
+
should "put a sequence into clean if one hit is not in the filter" do
|
30
|
+
|
31
|
+
Dir.mktmpdir do |dir|
|
32
|
+
%x[bin/phyta-split -i #{SPLIT_DATADIR}/in_okay.csv -c #{dir}/clean_okay.csv -d #{dir}/contaminated_okay.csv -f #{SPLIT_DATADIR}/../common/default_filter.yaml]
|
33
|
+
clean_result = File.open("#{dir}/clean_okay.csv").read
|
34
|
+
contaminated_result = File.open("#{dir}/contaminated_okay.csv").read
|
35
|
+
|
36
|
+
clean_target = File.open("#{SPLIT_DATADIR}/clean_okay_target.csv").read
|
37
|
+
contaminated_target = File.open("#{SPLIT_DATADIR}/contaminated_okay_target.csv").read
|
38
|
+
|
39
|
+
assert_not_nil clean_result
|
40
|
+
assert_not_nil contaminated_result
|
41
|
+
assert_not_nil clean_target
|
42
|
+
assert_not_nil contaminated_target
|
43
|
+
|
44
|
+
assert_equal clean_target, clean_result, "Clean files differ"
|
45
|
+
assert_equal contaminated_target, contaminated_result, "Contaminated files differ"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
should "put a sequence into contaminated if all hits are captured by the filter" do
|
51
|
+
|
52
|
+
Dir.mktmpdir do |dir|
|
53
|
+
%x[bin/phyta-split -i #{SPLIT_DATADIR}/in_other.csv -c #{dir}/clean_other.csv -d #{dir}/contaminated_other.csv -f #{SPLIT_DATADIR}/../common/default_filter.yaml]
|
54
|
+
clean_result = File.open("#{dir}/clean_other.csv").read
|
55
|
+
contaminated_result = File.open("#{dir}/contaminated_other.csv").read
|
56
|
+
|
57
|
+
clean_target = File.open("#{SPLIT_DATADIR}/clean_other_target.csv").read
|
58
|
+
contaminated_target = File.open("#{SPLIT_DATADIR}/contaminated_other_target.csv").read
|
59
|
+
|
60
|
+
assert_not_nil clean_result
|
61
|
+
assert_not_nil contaminated_result
|
62
|
+
assert_not_nil clean_target
|
63
|
+
assert_not_nil contaminated_target
|
64
|
+
|
65
|
+
assert_equal clean_target, clean_result, "Clean files differ"
|
66
|
+
assert_equal contaminated_target, contaminated_result, "Contaminated files differ"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
should "always choose the best hit, even if it is in the filtered set" do
|
74
|
+
|
75
|
+
Dir.mktmpdir do |dir|
|
76
|
+
%x[bin/phyta-split -i #{SPLIT_DATADIR}/in_3.csv -c #{dir}/clean_3.csv -d #{dir}/contaminated_3.csv -f #{SPLIT_DATADIR}/../common/default_filter.yaml]
|
77
|
+
clean_result = File.open("#{dir}/clean_3.csv").read
|
78
|
+
contaminated_result = File.open("#{dir}/contaminated_3.csv").read
|
79
|
+
|
80
|
+
clean_target = File.open("#{SPLIT_DATADIR}/out_3_target_clean.csv").read
|
81
|
+
contaminated_target = File.open("#{SPLIT_DATADIR}/out_3_target_contaminated.csv").read
|
82
|
+
|
83
|
+
assert_not_nil clean_result
|
84
|
+
assert_not_nil contaminated_result
|
85
|
+
assert_not_nil clean_target
|
86
|
+
assert_not_nil contaminated_target
|
87
|
+
|
88
|
+
assert_equal clean_target, clean_result, "Clean files differ"
|
89
|
+
assert_equal contaminated_target, contaminated_result, "Contaminated files differ"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
should "split with the default filter if none specified" do
|
95
|
+
|
96
|
+
Dir.mktmpdir do |dir|
|
97
|
+
%x[bin/phyta-split -i #{SPLIT_DATADIR}/in_3.csv -c #{dir}/clean_3.csv -d #{dir}/contaminated_3.csv]
|
98
|
+
clean_result = File.open("#{dir}/clean_3.csv").read
|
99
|
+
contaminated_result = File.open("#{dir}/contaminated_3.csv").read
|
100
|
+
|
101
|
+
clean_target = File.open("#{SPLIT_DATADIR}/out_3_default_filter_target_clean.csv").read
|
102
|
+
contaminated_target = File.open("#{SPLIT_DATADIR}/out_3_default_filter_target_contaminated.csv").read
|
103
|
+
|
104
|
+
assert_not_nil clean_result
|
105
|
+
assert_not_nil contaminated_result
|
106
|
+
assert_not_nil clean_target
|
107
|
+
assert_not_nil contaminated_target
|
108
|
+
|
109
|
+
assert_equal clean_target, clean_result, "Clean files differ"
|
110
|
+
assert_equal contaminated_target, contaminated_result, "Contaminated files differ"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
metadata
CHANGED
@@ -1,191 +1,138 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-phyta
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.1
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 9
|
9
|
-
- 0
|
10
|
-
version: 0.9.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Philipp Comans
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
requirement: &
|
12
|
+
date: 2011-10-21 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio
|
16
|
+
requirement: &2153022740 !ruby/object:Gem::Requirement
|
22
17
|
none: false
|
23
|
-
requirements:
|
24
|
-
- -
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
hash: 3
|
27
|
-
segments:
|
28
|
-
- 1
|
29
|
-
- 4
|
30
|
-
- 2
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
31
21
|
version: 1.4.2
|
32
|
-
version_requirements: *id001
|
33
|
-
name: bio
|
34
|
-
prerelease: false
|
35
22
|
type: :runtime
|
36
|
-
|
37
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2153022740
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: mysql
|
27
|
+
requirement: &2153022260 !ruby/object:Gem::Requirement
|
38
28
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
hash: 45
|
43
|
-
segments:
|
44
|
-
- 2
|
45
|
-
- 8
|
46
|
-
- 1
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
47
32
|
version: 2.8.1
|
48
|
-
version_requirements: *id002
|
49
|
-
name: mysql
|
50
|
-
prerelease: false
|
51
33
|
type: :runtime
|
52
|
-
|
53
|
-
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2153022260
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: sequel
|
38
|
+
requirement: &2153021780 !ruby/object:Gem::Requirement
|
54
39
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
hash: 119
|
59
|
-
segments:
|
60
|
-
- 3
|
61
|
-
- 28
|
62
|
-
- 0
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
63
43
|
version: 3.28.0
|
64
|
-
version_requirements: *id003
|
65
|
-
name: sequel
|
66
|
-
prerelease: false
|
67
44
|
type: :runtime
|
68
|
-
|
69
|
-
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *2153021780
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: fastercsv
|
49
|
+
requirement: &2153021300 !ruby/object:Gem::Requirement
|
70
50
|
none: false
|
71
|
-
requirements:
|
72
|
-
- -
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
hash: 11
|
75
|
-
segments:
|
76
|
-
- 1
|
77
|
-
- 5
|
78
|
-
- 4
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
79
54
|
version: 1.5.4
|
80
|
-
version_requirements: *id004
|
81
|
-
name: fastercsv
|
82
|
-
prerelease: false
|
83
55
|
type: :runtime
|
84
|
-
|
85
|
-
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *2153021300
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: nokogiri
|
60
|
+
requirement: &2153020820 !ruby/object:Gem::Requirement
|
86
61
|
none: false
|
87
|
-
requirements:
|
88
|
-
- -
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
hash: 3
|
91
|
-
segments:
|
92
|
-
- 1
|
93
|
-
- 5
|
94
|
-
- 0
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
95
65
|
version: 1.5.0
|
96
|
-
version_requirements: *id005
|
97
|
-
name: nokogiri
|
98
|
-
prerelease: false
|
99
66
|
type: :runtime
|
100
|
-
|
101
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *2153020820
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: trollop
|
71
|
+
requirement: &2153020340 !ruby/object:Gem::Requirement
|
102
72
|
none: false
|
103
|
-
requirements:
|
104
|
-
- -
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
hash: 83
|
107
|
-
segments:
|
108
|
-
- 1
|
109
|
-
- 16
|
110
|
-
- 2
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
111
76
|
version: 1.16.2
|
112
|
-
version_requirements: *id006
|
113
|
-
name: trollop
|
114
|
-
prerelease: false
|
115
77
|
type: :runtime
|
116
|
-
- !ruby/object:Gem::Dependency
|
117
|
-
requirement: &id007 !ruby/object:Gem::Requirement
|
118
|
-
none: false
|
119
|
-
requirements:
|
120
|
-
- - ">="
|
121
|
-
- !ruby/object:Gem::Version
|
122
|
-
hash: 3
|
123
|
-
segments:
|
124
|
-
- 0
|
125
|
-
version: "0"
|
126
|
-
version_requirements: *id007
|
127
|
-
name: shoulda
|
128
78
|
prerelease: false
|
79
|
+
version_requirements: *2153020340
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: shoulda
|
82
|
+
requirement: &2153019860 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
129
88
|
type: :development
|
130
|
-
|
131
|
-
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *2153019860
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: bundler
|
93
|
+
requirement: &2153019380 !ruby/object:Gem::Requirement
|
132
94
|
none: false
|
133
|
-
requirements:
|
95
|
+
requirements:
|
134
96
|
- - ~>
|
135
|
-
- !ruby/object:Gem::Version
|
136
|
-
hash: 23
|
137
|
-
segments:
|
138
|
-
- 1
|
139
|
-
- 0
|
140
|
-
- 0
|
97
|
+
- !ruby/object:Gem::Version
|
141
98
|
version: 1.0.0
|
142
|
-
version_requirements: *id008
|
143
|
-
name: bundler
|
144
|
-
prerelease: false
|
145
99
|
type: :development
|
146
|
-
|
147
|
-
|
100
|
+
prerelease: false
|
101
|
+
version_requirements: *2153019380
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: jeweler
|
104
|
+
requirement: &2153018900 !ruby/object:Gem::Requirement
|
148
105
|
none: false
|
149
|
-
requirements:
|
106
|
+
requirements:
|
150
107
|
- - ~>
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
hash: 7
|
153
|
-
segments:
|
154
|
-
- 1
|
155
|
-
- 6
|
156
|
-
- 4
|
108
|
+
- !ruby/object:Gem::Version
|
157
109
|
version: 1.6.4
|
158
|
-
version_requirements: *id009
|
159
|
-
name: jeweler
|
160
|
-
prerelease: false
|
161
110
|
type: :development
|
162
|
-
- !ruby/object:Gem::Dependency
|
163
|
-
requirement: &id010 !ruby/object:Gem::Requirement
|
164
|
-
none: false
|
165
|
-
requirements:
|
166
|
-
- - ">="
|
167
|
-
- !ruby/object:Gem::Version
|
168
|
-
hash: 3
|
169
|
-
segments:
|
170
|
-
- 0
|
171
|
-
version: "0"
|
172
|
-
version_requirements: *id010
|
173
|
-
name: rcov
|
174
111
|
prerelease: false
|
112
|
+
version_requirements: *2153018900
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: rcov
|
115
|
+
requirement: &2153018420 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
175
121
|
type: :development
|
176
|
-
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *2153018420
|
124
|
+
description: Pipeline to remove contaminations from EST libraries
|
177
125
|
email: philipp.comans@googlemail.com
|
178
|
-
executables:
|
179
|
-
- phyta-split
|
126
|
+
executables:
|
180
127
|
- phyta-assign
|
181
128
|
- phyta-extract
|
182
129
|
- phyta-setup-taxonomy-db
|
130
|
+
- phyta-split
|
183
131
|
extensions: []
|
184
|
-
|
185
|
-
extra_rdoc_files:
|
132
|
+
extra_rdoc_files:
|
186
133
|
- LICENSE.txt
|
187
134
|
- README.rdoc
|
188
|
-
files:
|
135
|
+
files:
|
189
136
|
- .document
|
190
137
|
- Gemfile
|
191
138
|
- LICENSE.txt
|
@@ -199,41 +146,37 @@ files:
|
|
199
146
|
- lib/blast_string_parser.rb
|
200
147
|
- lib/kingdom_db.rb
|
201
148
|
- test/helper.rb
|
202
|
-
- test/
|
149
|
+
- test/test_blackbox_assign.rb
|
150
|
+
- test/test_blackbox_extract.rb
|
151
|
+
- test/test_blackbox_split.rb
|
203
152
|
- test/test_blast_string_parser.rb
|
204
153
|
- test/test_kingdom_db.rb
|
205
|
-
homepage:
|
206
|
-
licenses:
|
154
|
+
homepage: https://github.com/PalMuc/bio-phyta
|
155
|
+
licenses:
|
207
156
|
- LGPL
|
208
157
|
post_install_message:
|
209
158
|
rdoc_options: []
|
210
|
-
|
211
|
-
require_paths:
|
159
|
+
require_paths:
|
212
160
|
- lib
|
213
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
161
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
214
162
|
none: false
|
215
|
-
requirements:
|
216
|
-
- -
|
217
|
-
- !ruby/object:Gem::Version
|
218
|
-
|
219
|
-
segments:
|
163
|
+
requirements:
|
164
|
+
- - ! '>='
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
segments:
|
220
168
|
- 0
|
221
|
-
|
222
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
169
|
+
hash: -3130547697683155421
|
170
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
223
171
|
none: false
|
224
|
-
requirements:
|
225
|
-
- -
|
226
|
-
- !ruby/object:Gem::Version
|
227
|
-
|
228
|
-
segments:
|
229
|
-
- 0
|
230
|
-
version: "0"
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
231
176
|
requirements: []
|
232
|
-
|
233
177
|
rubyforge_project:
|
234
178
|
rubygems_version: 1.8.10
|
235
179
|
signing_key:
|
236
180
|
specification_version: 3
|
237
181
|
summary: Pipeline to remove contaminations from EST libraries
|
238
182
|
test_files: []
|
239
|
-
|
data/test/test_blackbox.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
require 'tmpdir'
|
3
|
-
|
4
|
-
class BlackBoxTest < Test::Unit::TestCase
|
5
|
-
def test_without_parameters
|
6
|
-
#This test does not make a whole lot of sense...
|
7
|
-
result = %x[bin/phyta-assign]
|
8
|
-
expected = "Invalid arguments, see --help for more information."
|
9
|
-
assert_equal expected.strip, result.strip
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_small
|
13
|
-
Dir.mktmpdir do |dir|
|
14
|
-
%x[bin/phyta-assign -i test/data/in_3.xml -o #{dir}/out_3.csv]
|
15
|
-
result = File.open("#{dir}/out_3.csv").read
|
16
|
-
target = File.open("test/data/target_3.csv").read
|
17
|
-
|
18
|
-
assert_not_nil result
|
19
|
-
assert_not_nil target
|
20
|
-
|
21
|
-
assert_equal target, result, "Output of out_3.xml invalid"
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_medium
|
26
|
-
Dir.mktmpdir do |dir|
|
27
|
-
%x[bin/phyta-assign -i test/data/in_medium.xml -o #{dir}/out_medium.csv]
|
28
|
-
result = File.open("#{dir}/out_medium.csv").read
|
29
|
-
target = File.open("test/data/target_medium.csv").read
|
30
|
-
|
31
|
-
assert_not_nil result
|
32
|
-
assert_not_nil target
|
33
|
-
|
34
|
-
assert_block "Output of out_medium.xml invalid." do
|
35
|
-
result == target
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
|