genevalidator 1.5.6 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/README.md +2 -5
- data/lib/genevalidator.rb +16 -17
- data/lib/genevalidator/pool.rb +64 -0
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +5 -5
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c5782d78579ddb972a737ce921d5d360ff66e21
|
4
|
+
data.tar.gz: 07ecd5655a16f0c28daef03d836e5a2c9fdc2766
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39b4e764b527e3d58fc81b70f3cacb402c63601e8e87ea3b44351442e99b098672625b23b8b9a4e48aa8fa0d2e4b57c5a8a9d67d53944f133e1084dfdf86cb70
|
7
|
+
data.tar.gz: 272badb88bf7c91cc57e37781c8dfddf90f7154f78865c414c7b3a436686f733001fe1a458c14c27008da439940762dd150b631244e042e25237fd5a5f4f84ce
|
data/.travis.yml
CHANGED
@@ -10,7 +10,6 @@ before_install:
|
|
10
10
|
- ruby -pi -e "gsub(/^PREFIX = \/usr\/local/, 'PREFIX = ~/mafft/')" ~/mafft-7.205-with-extensions/core/Makefile
|
11
11
|
- (cd ~/mafft-7.205-with-extensions/core/ && make clean && make && make install)
|
12
12
|
- export PATH=$PATH:~/mafft/bin
|
13
|
-
- sudo apt-get install -y libgsl0-dev
|
14
13
|
script: bundle exec rake test
|
15
14
|
addons:
|
16
15
|
code_climate:
|
data/README.md
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
# GeneValidator - Identify problems with predicted genes
|
2
|
-
|
3
|
-
[![
|
4
|
-
[![Gem Version](https://badge.fury.io/rb/GeneValidator.svg)](http://badge.fury.io/rb/GeneValidator)
|
2
|
+
[![Build Status](https://travis-ci.org/wurmlab/genevalidator.svg?branch=master)](https://travis-ci.org/wurmlab/genevalidator)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/genevalidator.svg)](http://badge.fury.io/rb/genevalidator)
|
5
4
|
[![Dependency Status](https://gemnasium.com/wurmlab/GeneValidator.svg)](https://gemnasium.com/wurmlab/GeneValidator)
|
6
5
|
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/wurmlab/GeneValidator/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/wurmlab/GeneValidator/?branch=master)
|
7
6
|
[![Test Coverage](https://codeclimate.com/github/wurmlab/GeneValidator/badges/coverage.svg)](https://codeclimate.com/github/wurmlab/GeneValidator)
|
8
|
-
[![total downloads](http://ruby-gem-downloads-badge.herokuapp.com/GeneValidator?type=total&color=brightgreen)](https://rubygems.org/gems/GeneValidator)
|
9
7
|
|
10
8
|
## Introduction
|
11
9
|
The goal of GeneValidator is to identify problems with gene predictions and provide useful information based on the similarities to genes in public databases. The results produced will make provide evidence on how sequencing curation may be done and will be useful in improving or trying out new approaches for gene prediction tools. The main target of this tool are biologists who wish to validate the data produced in their labs.
|
@@ -38,7 +36,6 @@ It is also possible to add your own custom validations to GeneValidator.
|
|
38
36
|
* NCBI BLAST+ (>= 2.2.30+)
|
39
37
|
* MAFFT installation (download [here](http://mafft.cbrc.jp/alignment/software/)).
|
40
38
|
* Mozilla FireFox - In order to dynamically produce graphs for some of the validation, GeneValidator relies on dependency called 'd3'. Unfortunately, at this moment of time, d3 only works in Firefox (download [here](https://www.mozilla.org/en-GB/firefox/new/)).
|
41
|
-
* GSL libraries (GNU Scientific Library) (see link below for installation help)
|
42
39
|
|
43
40
|
Please see [here](https://gist.github.com/IsmailM/b783e8a06565197084e6) for more help with installing the prerequisites.
|
44
41
|
|
data/lib/genevalidator.rb
CHANGED
@@ -11,6 +11,7 @@ require 'genevalidator/validation_gene_merge'
|
|
11
11
|
require 'genevalidator/validation_duplication'
|
12
12
|
require 'genevalidator/validation_open_reading_frame'
|
13
13
|
require 'genevalidator/validation_alignment'
|
14
|
+
require 'genevalidator/pool'
|
14
15
|
require 'bio-blastxmlparser'
|
15
16
|
require 'open-uri'
|
16
17
|
require 'uri'
|
@@ -37,7 +38,6 @@ module GeneValidator
|
|
37
38
|
# array of indexes for the start offsets of each query in the fasta file
|
38
39
|
attr_reader :query_offset_lst
|
39
40
|
attr_reader :overall_evaluation
|
40
|
-
attr_reader :multithreading
|
41
41
|
|
42
42
|
# global variables
|
43
43
|
attr_reader :no_queries
|
@@ -64,9 +64,7 @@ module GeneValidator
|
|
64
64
|
# db: 'remote', raw_sequences: nil, num_threads: 1 fast: false}
|
65
65
|
# +start_idx+: number of the sequence from the file to start with
|
66
66
|
# +overall_evaluation+: boolean variable for printing overall evaluation
|
67
|
-
|
68
|
-
def initialize(opt, start_idx = 1, overall_evaluation = true,
|
69
|
-
multithreading = false)
|
67
|
+
def initialize(opt, start_idx = 1, overall_evaluation = true)
|
70
68
|
# Validate opts
|
71
69
|
@opt = GVArgValidation.validate_args(opt)
|
72
70
|
|
@@ -76,7 +74,6 @@ module GeneValidator
|
|
76
74
|
@idx = 0
|
77
75
|
@start_idx = start_idx
|
78
76
|
|
79
|
-
@multithreading = multithreading
|
80
77
|
@overall_evaluation = overall_evaluation
|
81
78
|
|
82
79
|
# start a worker thread
|
@@ -240,6 +237,8 @@ module GeneValidator
|
|
240
237
|
##
|
241
238
|
#
|
242
239
|
def run_validations(iterator)
|
240
|
+
p = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
|
241
|
+
|
243
242
|
while @idx + 1 < @query_offset_lst.length
|
244
243
|
prediction = get_info_on_each_query_sequence
|
245
244
|
@idx += 1
|
@@ -250,18 +249,18 @@ module GeneValidator
|
|
250
249
|
@idx -= 1
|
251
250
|
break
|
252
251
|
end
|
253
|
-
|
252
|
+
current_idx = @idx
|
254
253
|
# the first validation should be treated separately
|
255
|
-
if
|
256
|
-
validate(prediction, hits,
|
254
|
+
if current_idx == @start_idx || @opt[:num_threads] == 1
|
255
|
+
validate(prediction, hits, current_idx)
|
257
256
|
else
|
258
|
-
|
259
|
-
validate(prediction, hits,
|
257
|
+
p.schedule(prediction, hits, current_idx) do |prediction, hits, current_idx|
|
258
|
+
validate(prediction, hits, current_idx)
|
260
259
|
end
|
261
260
|
end
|
262
|
-
|
263
261
|
end
|
264
|
-
|
262
|
+
ensure
|
263
|
+
p.shutdown if @opt[:num_threads] > 1
|
265
264
|
end
|
266
265
|
|
267
266
|
def parse_next_iteration(iterator, prediction)
|
@@ -297,8 +296,8 @@ module GeneValidator
|
|
297
296
|
# +prediction+: Sequence object
|
298
297
|
# +hits+: Array of +Sequence+ objects
|
299
298
|
# +idx+: the index number of the query
|
300
|
-
def validate(prediction, hits,
|
301
|
-
query_output = do_validations(prediction, hits,
|
299
|
+
def validate(prediction, hits, current_idx)
|
300
|
+
query_output = do_validations(prediction, hits, current_idx)
|
302
301
|
query_output.generate_html
|
303
302
|
query_output.print_output_file_yaml
|
304
303
|
query_output.print_output_console
|
@@ -382,7 +381,7 @@ module GeneValidator
|
|
382
381
|
# +idx+: the index number of the query
|
383
382
|
# Output:
|
384
383
|
# +Output+ object
|
385
|
-
def do_validations(prediction, hits,
|
384
|
+
def do_validations(prediction, hits, current_idx)
|
386
385
|
begin
|
387
386
|
hits = remove_identical_hits(prediction, hits)
|
388
387
|
rescue Exception => error # NoPIdentError
|
@@ -390,12 +389,12 @@ module GeneValidator
|
|
390
389
|
|
391
390
|
query_output = Output.new(@mutex, @mutex_yaml, @mutex_html,
|
392
391
|
@filename, @html_path,
|
393
|
-
@yaml_path,
|
392
|
+
@yaml_path, current_idx, @start_idx)
|
394
393
|
query_output.prediction_len = prediction.length_protein
|
395
394
|
query_output.prediction_def = prediction.definition
|
396
395
|
query_output.nr_hits = hits.length
|
397
396
|
|
398
|
-
plot_path = File.join(@plot_dir, "#{@filename}_#{
|
397
|
+
plot_path = File.join(@plot_dir, "#{@filename}_#{current_idx}")
|
399
398
|
|
400
399
|
validations = []
|
401
400
|
validations.push LengthClusterValidation.new(@type, prediction, hits,
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# From http://burgestrand.se/code/ruby-thread-pool/
|
2
|
+
#
|
3
|
+
# Copyright © 2012, Kim Burgestrand kim@burgestrand.se
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the “Software”), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
|
23
|
+
require 'thread'
|
24
|
+
|
25
|
+
class Pool
|
26
|
+
def initialize(size)
|
27
|
+
@size = size
|
28
|
+
@jobs = Queue.new
|
29
|
+
@pool = Array.new(@size) do |i|
|
30
|
+
Thread.new do
|
31
|
+
Thread.current[:id] = i
|
32
|
+
catch(:exit) do
|
33
|
+
loop do
|
34
|
+
job, args = @jobs.pop
|
35
|
+
job.call(*args)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def schedule(*args, &block)
|
43
|
+
@jobs << [block, args]
|
44
|
+
end
|
45
|
+
|
46
|
+
def shutdown
|
47
|
+
@size.times do
|
48
|
+
schedule { throw :exit }
|
49
|
+
end
|
50
|
+
@pool.map(&:join)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
if $0 == __FILE__
|
55
|
+
p = Pool.new(10)
|
56
|
+
|
57
|
+
20.times do |i|
|
58
|
+
p.schedule do
|
59
|
+
sleep rand(4) + 2
|
60
|
+
puts "Job #{i} finished by thread #{Thread.current[:id]}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
at_exit { p.shutdown }
|
64
|
+
end
|
@@ -32,7 +32,7 @@ module GeneValidator
|
|
32
32
|
tab_options = "qseqid sseqid sacc slen qstart qend sstart send length qframe pident evalue"
|
33
33
|
|
34
34
|
database = 'swissprot -remote'
|
35
|
-
threads = '
|
35
|
+
threads = '1'
|
36
36
|
|
37
37
|
# Unwanted Output Files
|
38
38
|
prot_xml_out = "#{prot_blast_xml_file}.out"
|
@@ -63,7 +63,7 @@ module GeneValidator
|
|
63
63
|
test: true
|
64
64
|
}
|
65
65
|
|
66
|
-
(GeneValidator::Validation.new(opts, 1, false
|
66
|
+
(GeneValidator::Validation.new(opts, 1, false)).run
|
67
67
|
$stdout.reopen original_stdout
|
68
68
|
$stdout.reopen(prot_tab_out, 'w')
|
69
69
|
|
@@ -81,7 +81,7 @@ module GeneValidator
|
|
81
81
|
test: true
|
82
82
|
}
|
83
83
|
|
84
|
-
(GeneValidator::Validation.new(opts1, 1, false
|
84
|
+
(GeneValidator::Validation.new(opts1, 1, false)).run
|
85
85
|
$stdout.reopen original_stdout
|
86
86
|
|
87
87
|
diff = FileUtils.compare_file(prot_xml_out, prot_tab_out)
|
@@ -115,7 +115,7 @@ module GeneValidator
|
|
115
115
|
test: true
|
116
116
|
}
|
117
117
|
|
118
|
-
(GeneValidator::Validation.new(opts, 1, false
|
118
|
+
(GeneValidator::Validation.new(opts, 1, false)).run
|
119
119
|
$stdout.reopen original_stdout
|
120
120
|
$stdout.reopen(mrna_tab_out, 'w')
|
121
121
|
|
@@ -133,7 +133,7 @@ module GeneValidator
|
|
133
133
|
test: true
|
134
134
|
}
|
135
135
|
|
136
|
-
(GeneValidator::Validation.new(opts1, 1, false
|
136
|
+
(GeneValidator::Validation.new(opts1, 1, false)).run
|
137
137
|
$stdout.reopen original_stdout
|
138
138
|
|
139
139
|
diff = FileUtils.compare_file(mrna_xml_out, mrna_tab_out)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: genevalidator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Monica Dragan
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2015-05-
|
14
|
+
date: 2015-05-28 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: bundler
|
@@ -209,6 +209,7 @@ files:
|
|
209
209
|
- lib/genevalidator/get_raw_sequences.rb
|
210
210
|
- lib/genevalidator/hsp.rb
|
211
211
|
- lib/genevalidator/output.rb
|
212
|
+
- lib/genevalidator/pool.rb
|
212
213
|
- lib/genevalidator/sequences.rb
|
213
214
|
- lib/genevalidator/tabular_parser.rb
|
214
215
|
- lib/genevalidator/validation_alignment.rb
|