npsearch 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +3 -2
- data/Rakefile +14 -5
- data/bin/npsearch +45 -33
- data/lib/npsearch/arg_validator.rb +70 -241
- data/lib/npsearch/output.rb +6 -5
- data/lib/npsearch/pool.rb +1 -1
- data/lib/npsearch/scoresequence.rb +62 -60
- data/lib/npsearch/sequence.rb +12 -9
- data/lib/npsearch/signalp.rb +29 -10
- data/lib/npsearch/version.rb +1 -1
- data/lib/npsearch.rb +27 -52
- data/npsearch.gemspec +2 -1
- data/templates/contents.slim +3 -3
- data/test/files/mixed_content.fa +167 -0
- data/test/test_argument_validator.rb +50 -0
- data/test/test_helper.rb +1 -0
- data/test/test_sequence.rb +81 -0
- data/test/test_sequence_scoring.rb +142 -0
- metadata +27 -17
- data/test/files/1_protein.fa +0 -204
- data/test/files/2_orf.fa +0 -1330
- data/test/files/3_signalp_out.txt +0 -667
- data/test/files/4_secretome.fa +0 -6
- data/test/files/5_output.fa +0 -6
- data/test/files/5_output.html +0 -37
- data/test/test_np_search.rb +0 -122
data/test/files/4_secretome.fa
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
>isotig00009_f2_3~~~ - S.P.=> Cleavage Site: 22:23 | D-value: 0.532
|
2
|
-
MLKCFSIIMGLILLLEIGGGCA~~~IYFYRAQIQAQFQKSLTDVTITDYRENADFQDLIDALQSGLSCCGVNSYEDWDNNIYFNCSGPANNPEALWCAFLLLYTGSSKRSSQHPVRLWSSFPRTTKYFPHKDLHHWLCGYVYNVD
|
3
|
-
>isotig00009_f3_7~~~ - S.P.=> Cleavage Site: 16:17 | D-value: 0.643
|
4
|
-
MKTGIIIFISTVVVLP~~~ITLKPCGVPFSCCIPDQASGVANTQCGYGVRSPEQQNTFHTKIYTTGCADMFTMWINRYLYYIAGIAGVIVLVELFGFCFAHSLINDIKRQKARWAHR
|
5
|
-
>isotig00016_f1_0~~~ - S.P.=> Cleavage Site: 23:24 | D-value: 0.592
|
6
|
-
MNAGQIFIALMAQLFNACLLVSS~~~NFDSDIADSTLGKRSTGFVDTFGKRFVDSFGKRVDEFDYDHNGNYAEQSEQSSYISPQLKRGQKGLRSGSFIDAFGKRSSFQEVDEKRFADSFGKRFADSFGKRSPVGFVDTLGKRFAVSFGKRNTVGFVDTLGKRFADSFGKRSQQGFVDAFGKRYQGVY
|
data/test/files/5_output.fa
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
>isotig00009_f2_3~~~ - S.P.=> Cleavage Site: 22:23 | D-value: 0.532
|
2
|
-
MLKCFSIIMGLILLLEIGGGCA~~~IYFYRAQIQAQFQKSLTDVTITDYRENADFQDLIDALQSGLSCCGVNSYEDWDNNIYFNCSGPANNPEALWCAFLLLYTGSSKRSSQHPVRLWSSFPRTTKYFPHKDLHHWLCGYVYNVD
|
3
|
-
>isotig00009_f3_7~~~ - S.P.=> Cleavage Site: 16:17 | D-value: 0.643
|
4
|
-
MKTGIIIFISTVVVLP~~~ITLKPCGVPFSCCIPDQASGVANTQCGYGVRSPEQQNTFHTKIYTTGCADMFTMWINRYLYYIAGIAGVIVLVELFGFCFAHSLINDIKRQKARWAHR
|
5
|
-
>isotig00016_f1_0~~~ - S.P.=> Cleavage Site: 23:24 | D-value: 0.592
|
6
|
-
MNAGQIFIALMAQLFNACLLVSS~~~NFDSDIADSTLGKRSTGFVDTFGKRFVDSFGKRVDEFDYDHNGNYAEQSEQSSYISPQLKRGQKGLRSGSFIDAFGKRSSFQEVDEKRFADSFGKRFADSFGKRSPVGFVDTLGKRFAVSFGKRNTVGFVDTLGKRFADSFGKRSQQGFVDAFGKRYQGVY
|
data/test/files/5_output.html
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<title>Results</title>
|
5
|
-
<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
|
6
|
-
<style>
|
7
|
-
.id {font-weight: bold;}
|
8
|
-
.signalp {color:#007AC0; font-weight: bold;}
|
9
|
-
.motif {color:#00B050; font-weight: bold;}
|
10
|
-
.glycine {color:#FFC000; font-weight: bold;}
|
11
|
-
.phenylalanine {color:#FF00EB; font-weight: bold;}
|
12
|
-
.gkr {color:#FF0000; font-weight: bold;}
|
13
|
-
.cysteine {color:#00B050;}
|
14
|
-
p {word-wrap: break-word; font-family:Courier New, Courier, Mono;}
|
15
|
-
</style>
|
16
|
-
</head>
|
17
|
-
<body>
|
18
|
-
<p>
|
19
|
-
<span class='id'>isotig00009_f2_3</span>
|
20
|
-
<span>- S.P.=> Cleavage Site: 22:23 | D-value: 0.532</span>
|
21
|
-
<br>
|
22
|
-
<span class='signalp'>MLKCFSIIMGLILLLEIGGGCA</span><span>IYFYRAQIQAQFQKSLTDVTITDYRENADFQDLIDALQSGLS<span class="cysteine">C</span><span class="cysteine">C</span>GVNSYEDWDNNIYFN<span class="cysteine">C</span>SGPANNPEALW<span class="cysteine">C</span>AFLLLYTGSS<span class="motif">KR</span>SSQ<span class="motif">HPVR</span>LWSSFPRTTKYFPHKDLHHWL<span class="cysteine">C</span>GYVYNVD</span>
|
23
|
-
</p>
|
24
|
-
<p>
|
25
|
-
<span class='id'>isotig00009_f3_7</span>
|
26
|
-
<span>- S.P.=> Cleavage Site: 16:17 | D-value: 0.643</span>
|
27
|
-
<br>
|
28
|
-
<span class='signalp'>MKTGIIIFISTVVVLP</span><span>ITLKP<span class="cysteine">C</span>GVPFS<span class="cysteine">C</span><span class="cysteine">C</span>IPDQASGVANTQ<span class="cysteine">C</span>GYGVRSPEQQNTFHTKIYTTG<span class="cysteine">C</span>ADMFTMWINRYLYYIAGIAGVIVLVELFGF<span class="cysteine">C</span>FAHSLINDI<span class="motif">KR</span>QKARWAHR</span>
|
29
|
-
</p>
|
30
|
-
<p>
|
31
|
-
<span class='id'>isotig00016_f1_0</span>
|
32
|
-
<span>- S.P.=> Cleavage Site: 23:24 | D-value: 0.592</span>
|
33
|
-
<br>
|
34
|
-
<span class='signalp'>MNAGQIFIALMAQLFNACLLVSS</span><span>NFDSDIADSTL<span class="gkr">GKR</span>STGFVDTF<span class="gkr">GKR</span>FVDSF<span class="gkr">GKR</span>VDEFDYDHNGNYAEQSEQSSYISPQL<span class="motif">KR</span>GQ<span class="motif">KGLR</span>SGSFIDAF<span class="gkr">GKR</span>SSFQEVDE<span class="motif">KR</span>FADSF<span class="gkr">GKR</span>FADSF<span class="gkr">GKR</span>SPVGFVDTL<span class="gkr">GKR</span>FAVSF<span class="gkr">GKR</span>NTVGFVDTL<span class="gkr">GKR</span>FADSF<span class="gkr">GKR</span>SQQGFVDAF<span class="gkr">GKR</span>YQGVY</span>
|
35
|
-
</p>
|
36
|
-
</body>
|
37
|
-
</html>
|
data/test/test_np_search.rb
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'coveralls'
|
3
|
-
Coveralls.wear!
|
4
|
-
|
5
|
-
require 'bio'
|
6
|
-
require 'test/unit'
|
7
|
-
|
8
|
-
require 'npsearch'
|
9
|
-
require 'npsearch/arg_validator'
|
10
|
-
|
11
|
-
class InputChanged
|
12
|
-
# Changed to include the full entry definition rather than just the id. This
|
13
|
-
# was necessary for when testing individual parts since this is changed in
|
14
|
-
# pipeline
|
15
|
-
def read(input_file)
|
16
|
-
input_read = {}
|
17
|
-
biofastafile = Bio::FlatFile.open(Bio::FastaFormat, input_file)
|
18
|
-
biofastafile.each_entry do |entry|
|
19
|
-
# entry.definition used instead of entry.id
|
20
|
-
input_read[entry.definition] = entry.seq
|
21
|
-
end
|
22
|
-
input_read
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
class UnitTests < Test::Unit::TestCase
|
27
|
-
def setup # read all expected files
|
28
|
-
@dir = 'test/files'
|
29
|
-
@translation_test = NpSearch::Translation.new
|
30
|
-
@analysis_test = NpSearch::Analysis.new
|
31
|
-
@test_input1 = InputChanged.new
|
32
|
-
@test_arg_vldr = NpSearch::ArgValidators.new(:is_verbose)
|
33
|
-
@test_vldr = NpSearch::Validators.new
|
34
|
-
@test_input_read = NpSearch::Input.read("#{@dir}/genetic.fa")
|
35
|
-
@expected_translation = NpSearch::Input.read("#{@dir}/1_protein.fa")
|
36
|
-
@expected_orf = NpSearch::Input.read("#{@dir}/2_orf.fa")
|
37
|
-
@expected_sp_out_file = File.read("#{@dir}/3_signalp_out.txt")
|
38
|
-
@expected_secretome = @test_input1.read("#{@dir}/4_secretome.fa")
|
39
|
-
@expected_output = @test_input1.read("#{@dir}/5_output.fa")
|
40
|
-
@motif = 'KK|KR|RR|R..R|R....R|R......R|' \
|
41
|
-
'H..R|H....R|H......R|K..R|K....R|K......R'
|
42
|
-
@motif_ar = %w(KR Kr kr)
|
43
|
-
@input_file_ar = ["#{@dir}/genetic.fa", "#{@dir}/protein.fa"]
|
44
|
-
@input_file_ar_neg = ["#{@dir}/empty_file.fa",
|
45
|
-
"#{@dir}/missing_input.fa", "#{@dir}/not_fasta.fa"]
|
46
|
-
@cut_off_ar = [622, 10, 2, 1]
|
47
|
-
@cut_off_ar_neg = [-10, 'hello', -5, 0]
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_motif
|
51
|
-
(0..2).each do |i|
|
52
|
-
test_run = @test_arg_vldr.arg(@motif_ar[i], @input_file_ar[0], @dir,
|
53
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
54
|
-
assert_equal('genetic', test_run)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_input_file
|
59
|
-
test_run = @test_arg_vldr.arg(@motif_ar[0], @input_file_ar[0], @dir,
|
60
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
61
|
-
assert_equal('genetic', test_run)
|
62
|
-
test_run1 = @test_arg_vldr.arg(@motif_ar[0], @input_file_ar[1], @dir,
|
63
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
64
|
-
assert_equal('protein', test_run1)
|
65
|
-
(0..2).each do |i|
|
66
|
-
test_run2 = lambda do
|
67
|
-
@test_arg_vldr.arg(@motif_ar[0], @input_file_ar_neg[i], @dir,
|
68
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
69
|
-
end
|
70
|
-
assert_raise(ArgumentError) { test_run2 }
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_orf_min_length
|
75
|
-
(0..3).each do |i|
|
76
|
-
test_run = @test_arg_vldr.arg(@motif_ar[0], @input_file_ar[0], @dir,
|
77
|
-
@cut_off_ar[i], FALSE, nil, 'help_banner')
|
78
|
-
assert_equal('genetic', test_run)
|
79
|
-
end
|
80
|
-
(0..3).each do |i|
|
81
|
-
test_run1 = lambda do
|
82
|
-
@test_arg_vldr.arg(@motif_ar[0], @input_file_ar[0], @dir,
|
83
|
-
@cut_off_ar_neg[i], FALSE, nil, 'help_banner')
|
84
|
-
end
|
85
|
-
assert_raise(ArgumentError) { test_run1 }
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
# => Test if the output directory validator works, in ensuring whether an
|
90
|
-
# output directory can be found.
|
91
|
-
def test_output_dir_validator
|
92
|
-
assert_equal(nil, @test_vldr.output_dir(@dir))
|
93
|
-
end
|
94
|
-
|
95
|
-
# => Test if the translation method works properly - assert that the produced
|
96
|
-
# translation is equal to the expected translation hash
|
97
|
-
def test_translate
|
98
|
-
translation_hash_test = NpSearch::Translation.translate(@test_input_read)
|
99
|
-
assert_equal(@expected_translation, translation_hash_test)
|
100
|
-
end
|
101
|
-
|
102
|
-
# => Test if the extract_orf method works properly - assert that the produced
|
103
|
-
# orf hash is equal to the expected orf hash.
|
104
|
-
# => In orf method, an array is produced while in the expected file read, an
|
105
|
-
# array isn't produced, thus it s necessary to remove "[ ]" from both end.
|
106
|
-
def test_extract_orf
|
107
|
-
orf_hash_test = NpSearch::Translation.extract_orf(@expected_translation, 10)
|
108
|
-
assert_equal(@expected_orf.to_s, orf_hash_test.to_s.gsub('["', '"')
|
109
|
-
.gsub('"]', '"'))
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_parse
|
113
|
-
secretome = NpSearch::Analysis.parse("#{@dir}/3_signalp_out.txt",
|
114
|
-
@expected_orf, @motif)
|
115
|
-
assert_equal(@expected_secretome, secretome)
|
116
|
-
end
|
117
|
-
|
118
|
-
def test_flattener
|
119
|
-
flattened_output_test = NpSearch::Analysis.flattener(@expected_secretome)
|
120
|
-
assert_equal(@expected_output, flattened_output_test)
|
121
|
-
end
|
122
|
-
end
|