npsearch 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +3 -2
- data/Rakefile +14 -5
- data/bin/npsearch +45 -33
- data/lib/npsearch/arg_validator.rb +70 -241
- data/lib/npsearch/output.rb +6 -5
- data/lib/npsearch/pool.rb +1 -1
- data/lib/npsearch/scoresequence.rb +62 -60
- data/lib/npsearch/sequence.rb +12 -9
- data/lib/npsearch/signalp.rb +29 -10
- data/lib/npsearch/version.rb +1 -1
- data/lib/npsearch.rb +27 -52
- data/npsearch.gemspec +2 -1
- data/templates/contents.slim +3 -3
- data/test/files/mixed_content.fa +167 -0
- data/test/test_argument_validator.rb +50 -0
- data/test/test_helper.rb +1 -0
- data/test/test_sequence.rb +81 -0
- data/test/test_sequence_scoring.rb +142 -0
- metadata +27 -17
- data/test/files/1_protein.fa +0 -204
- data/test/files/2_orf.fa +0 -1330
- data/test/files/3_signalp_out.txt +0 -667
- data/test/files/4_secretome.fa +0 -6
- data/test/files/5_output.fa +0 -6
- data/test/files/5_output.html +0 -37
- data/test/test_np_search.rb +0 -122
data/test/files/4_secretome.fa
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
>isotig00009_f2_3~~~ - S.P.=> Cleavage Site: 22:23 | D-value: 0.532
|
2
|
-
MLKCFSIIMGLILLLEIGGGCA~~~IYFYRAQIQAQFQKSLTDVTITDYRENADFQDLIDALQSGLSCCGVNSYEDWDNNIYFNCSGPANNPEALWCAFLLLYTGSSKRSSQHPVRLWSSFPRTTKYFPHKDLHHWLCGYVYNVD
|
3
|
-
>isotig00009_f3_7~~~ - S.P.=> Cleavage Site: 16:17 | D-value: 0.643
|
4
|
-
MKTGIIIFISTVVVLP~~~ITLKPCGVPFSCCIPDQASGVANTQCGYGVRSPEQQNTFHTKIYTTGCADMFTMWINRYLYYIAGIAGVIVLVELFGFCFAHSLINDIKRQKARWAHR
|
5
|
-
>isotig00016_f1_0~~~ - S.P.=> Cleavage Site: 23:24 | D-value: 0.592
|
6
|
-
MNAGQIFIALMAQLFNACLLVSS~~~NFDSDIADSTLGKRSTGFVDTFGKRFVDSFGKRVDEFDYDHNGNYAEQSEQSSYISPQLKRGQKGLRSGSFIDAFGKRSSFQEVDEKRFADSFGKRFADSFGKRSPVGFVDTLGKRFAVSFGKRNTVGFVDTLGKRFADSFGKRSQQGFVDAFGKRYQGVY
|
data/test/files/5_output.fa
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
>isotig00009_f2_3~~~ - S.P.=> Cleavage Site: 22:23 | D-value: 0.532
|
2
|
-
MLKCFSIIMGLILLLEIGGGCA~~~IYFYRAQIQAQFQKSLTDVTITDYRENADFQDLIDALQSGLSCCGVNSYEDWDNNIYFNCSGPANNPEALWCAFLLLYTGSSKRSSQHPVRLWSSFPRTTKYFPHKDLHHWLCGYVYNVD
|
3
|
-
>isotig00009_f3_7~~~ - S.P.=> Cleavage Site: 16:17 | D-value: 0.643
|
4
|
-
MKTGIIIFISTVVVLP~~~ITLKPCGVPFSCCIPDQASGVANTQCGYGVRSPEQQNTFHTKIYTTGCADMFTMWINRYLYYIAGIAGVIVLVELFGFCFAHSLINDIKRQKARWAHR
|
5
|
-
>isotig00016_f1_0~~~ - S.P.=> Cleavage Site: 23:24 | D-value: 0.592
|
6
|
-
MNAGQIFIALMAQLFNACLLVSS~~~NFDSDIADSTLGKRSTGFVDTFGKRFVDSFGKRVDEFDYDHNGNYAEQSEQSSYISPQLKRGQKGLRSGSFIDAFGKRSSFQEVDEKRFADSFGKRFADSFGKRSPVGFVDTLGKRFAVSFGKRNTVGFVDTLGKRFADSFGKRSQQGFVDAFGKRYQGVY
|
data/test/files/5_output.html
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<title>Results</title>
|
5
|
-
<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
|
6
|
-
<style>
|
7
|
-
.id {font-weight: bold;}
|
8
|
-
.signalp {color:#007AC0; font-weight: bold;}
|
9
|
-
.motif {color:#00B050; font-weight: bold;}
|
10
|
-
.glycine {color:#FFC000; font-weight: bold;}
|
11
|
-
.phenylalanine {color:#FF00EB; font-weight: bold;}
|
12
|
-
.gkr {color:#FF0000; font-weight: bold;}
|
13
|
-
.cysteine {color:#00B050;}
|
14
|
-
p {word-wrap: break-word; font-family:Courier New, Courier, Mono;}
|
15
|
-
</style>
|
16
|
-
</head>
|
17
|
-
<body>
|
18
|
-
<p>
|
19
|
-
<span class='id'>isotig00009_f2_3</span>
|
20
|
-
<span>- S.P.=> Cleavage Site: 22:23 | D-value: 0.532</span>
|
21
|
-
<br>
|
22
|
-
<span class='signalp'>MLKCFSIIMGLILLLEIGGGCA</span><span>IYFYRAQIQAQFQKSLTDVTITDYRENADFQDLIDALQSGLS<span class="cysteine">C</span><span class="cysteine">C</span>GVNSYEDWDNNIYFN<span class="cysteine">C</span>SGPANNPEALW<span class="cysteine">C</span>AFLLLYTGSS<span class="motif">KR</span>SSQ<span class="motif">HPVR</span>LWSSFPRTTKYFPHKDLHHWL<span class="cysteine">C</span>GYVYNVD</span>
|
23
|
-
</p>
|
24
|
-
<p>
|
25
|
-
<span class='id'>isotig00009_f3_7</span>
|
26
|
-
<span>- S.P.=> Cleavage Site: 16:17 | D-value: 0.643</span>
|
27
|
-
<br>
|
28
|
-
<span class='signalp'>MKTGIIIFISTVVVLP</span><span>ITLKP<span class="cysteine">C</span>GVPFS<span class="cysteine">C</span><span class="cysteine">C</span>IPDQASGVANTQ<span class="cysteine">C</span>GYGVRSPEQQNTFHTKIYTTG<span class="cysteine">C</span>ADMFTMWINRYLYYIAGIAGVIVLVELFGF<span class="cysteine">C</span>FAHSLINDI<span class="motif">KR</span>QKARWAHR</span>
|
29
|
-
</p>
|
30
|
-
<p>
|
31
|
-
<span class='id'>isotig00016_f1_0</span>
|
32
|
-
<span>- S.P.=> Cleavage Site: 23:24 | D-value: 0.592</span>
|
33
|
-
<br>
|
34
|
-
<span class='signalp'>MNAGQIFIALMAQLFNACLLVSS</span><span>NFDSDIADSTL<span class="gkr">GKR</span>STGFVDTF<span class="gkr">GKR</span>FVDSF<span class="gkr">GKR</span>VDEFDYDHNGNYAEQSEQSSYISPQL<span class="motif">KR</span>GQ<span class="motif">KGLR</span>SGSFIDAF<span class="gkr">GKR</span>SSFQEVDE<span class="motif">KR</span>FADSF<span class="gkr">GKR</span>FADSF<span class="gkr">GKR</span>SPVGFVDTL<span class="gkr">GKR</span>FAVSF<span class="gkr">GKR</span>NTVGFVDTL<span class="gkr">GKR</span>FADSF<span class="gkr">GKR</span>SQQGFVDAF<span class="gkr">GKR</span>YQGVY</span>
|
35
|
-
</p>
|
36
|
-
</body>
|
37
|
-
</html>
|
data/test/test_np_search.rb
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'coveralls'
|
3
|
-
Coveralls.wear!
|
4
|
-
|
5
|
-
require 'bio'
|
6
|
-
require 'test/unit'
|
7
|
-
|
8
|
-
require 'npsearch'
|
9
|
-
require 'npsearch/arg_validator'
|
10
|
-
|
11
|
-
class InputChanged
|
12
|
-
# Changed to include the full entry definition rather than just the id. This
|
13
|
-
# was necessary for when testing individual parts since this is changed in
|
14
|
-
# pipeline
|
15
|
-
def read(input_file)
|
16
|
-
input_read = {}
|
17
|
-
biofastafile = Bio::FlatFile.open(Bio::FastaFormat, input_file)
|
18
|
-
biofastafile.each_entry do |entry|
|
19
|
-
# entry.definition used instead of entry.id
|
20
|
-
input_read[entry.definition] = entry.seq
|
21
|
-
end
|
22
|
-
input_read
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
class UnitTests < Test::Unit::TestCase
|
27
|
-
def setup # read all expected files
|
28
|
-
@dir = 'test/files'
|
29
|
-
@translation_test = NpSearch::Translation.new
|
30
|
-
@analysis_test = NpSearch::Analysis.new
|
31
|
-
@test_input1 = InputChanged.new
|
32
|
-
@test_arg_vldr = NpSearch::ArgValidators.new(:is_verbose)
|
33
|
-
@test_vldr = NpSearch::Validators.new
|
34
|
-
@test_input_read = NpSearch::Input.read("#{@dir}/genetic.fa")
|
35
|
-
@expected_translation = NpSearch::Input.read("#{@dir}/1_protein.fa")
|
36
|
-
@expected_orf = NpSearch::Input.read("#{@dir}/2_orf.fa")
|
37
|
-
@expected_sp_out_file = File.read("#{@dir}/3_signalp_out.txt")
|
38
|
-
@expected_secretome = @test_input1.read("#{@dir}/4_secretome.fa")
|
39
|
-
@expected_output = @test_input1.read("#{@dir}/5_output.fa")
|
40
|
-
@motif = 'KK|KR|RR|R..R|R....R|R......R|' \
|
41
|
-
'H..R|H....R|H......R|K..R|K....R|K......R'
|
42
|
-
@motif_ar = %w(KR Kr kr)
|
43
|
-
@input_file_ar = ["#{@dir}/genetic.fa", "#{@dir}/protein.fa"]
|
44
|
-
@input_file_ar_neg = ["#{@dir}/empty_file.fa",
|
45
|
-
"#{@dir}/missing_input.fa", "#{@dir}/not_fasta.fa"]
|
46
|
-
@cut_off_ar = [622, 10, 2, 1]
|
47
|
-
@cut_off_ar_neg = [-10, 'hello', -5, 0]
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_motif
|
51
|
-
(0..2).each do |i|
|
52
|
-
test_run = @test_arg_vldr.arg(@motif_ar[i], @input_file_ar[0], @dir,
|
53
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
54
|
-
assert_equal('genetic', test_run)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_input_file
|
59
|
-
test_run = @test_arg_vldr.arg(@motif_ar[0], @input_file_ar[0], @dir,
|
60
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
61
|
-
assert_equal('genetic', test_run)
|
62
|
-
test_run1 = @test_arg_vldr.arg(@motif_ar[0], @input_file_ar[1], @dir,
|
63
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
64
|
-
assert_equal('protein', test_run1)
|
65
|
-
(0..2).each do |i|
|
66
|
-
test_run2 = lambda do
|
67
|
-
@test_arg_vldr.arg(@motif_ar[0], @input_file_ar_neg[i], @dir,
|
68
|
-
@cut_off_ar[0], FALSE, nil, 'help_banner')
|
69
|
-
end
|
70
|
-
assert_raise(ArgumentError) { test_run2 }
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_orf_min_length
|
75
|
-
(0..3).each do |i|
|
76
|
-
test_run = @test_arg_vldr.arg(@motif_ar[0], @input_file_ar[0], @dir,
|
77
|
-
@cut_off_ar[i], FALSE, nil, 'help_banner')
|
78
|
-
assert_equal('genetic', test_run)
|
79
|
-
end
|
80
|
-
(0..3).each do |i|
|
81
|
-
test_run1 = lambda do
|
82
|
-
@test_arg_vldr.arg(@motif_ar[0], @input_file_ar[0], @dir,
|
83
|
-
@cut_off_ar_neg[i], FALSE, nil, 'help_banner')
|
84
|
-
end
|
85
|
-
assert_raise(ArgumentError) { test_run1 }
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
# => Test if the output directory validator works, in ensuring whether an
|
90
|
-
# output directory can be found.
|
91
|
-
def test_output_dir_validator
|
92
|
-
assert_equal(nil, @test_vldr.output_dir(@dir))
|
93
|
-
end
|
94
|
-
|
95
|
-
# => Test if the translation method works properly - assert that the produced
|
96
|
-
# translation is equal to the expected translation hash
|
97
|
-
def test_translate
|
98
|
-
translation_hash_test = NpSearch::Translation.translate(@test_input_read)
|
99
|
-
assert_equal(@expected_translation, translation_hash_test)
|
100
|
-
end
|
101
|
-
|
102
|
-
# => Test if the extract_orf method works properly - assert that the produced
|
103
|
-
# orf hash is equal to the expected orf hash.
|
104
|
-
# => In orf method, an array is produced while in the expected file read, an
|
105
|
-
# array isn't produced, thus it s necessary to remove "[ ]" from both end.
|
106
|
-
def test_extract_orf
|
107
|
-
orf_hash_test = NpSearch::Translation.extract_orf(@expected_translation, 10)
|
108
|
-
assert_equal(@expected_orf.to_s, orf_hash_test.to_s.gsub('["', '"')
|
109
|
-
.gsub('"]', '"'))
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_parse
|
113
|
-
secretome = NpSearch::Analysis.parse("#{@dir}/3_signalp_out.txt",
|
114
|
-
@expected_orf, @motif)
|
115
|
-
assert_equal(@expected_secretome, secretome)
|
116
|
-
end
|
117
|
-
|
118
|
-
def test_flattener
|
119
|
-
flattened_output_test = NpSearch::Analysis.flattener(@expected_secretome)
|
120
|
-
assert_equal(@expected_output, flattened_output_test)
|
121
|
-
end
|
122
|
-
end
|