npsearch 2.1.2 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/npsearch +6 -1
- data/exemplar_data/protein.fa +112 -0
- data/lib/npsearch.rb +34 -8
- data/lib/npsearch/arg_validator.rb +40 -16
- data/lib/npsearch/logger.rb +11 -0
- data/lib/npsearch/signalp.rb +8 -8
- data/lib/npsearch/version.rb +1 -1
- data/test/test_argument_validator.rb +3 -2
- data/test/test_sequence.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6357adc8080129fa2b76256669596bd7c0b956ba
|
4
|
+
data.tar.gz: a78e79d1462978dcb599d62d848a604aa86d883f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6557749bff5b29c99dc26edcf577df767f64382f6eb6cf8ab3f62c73b9ef5916fba201788efcb38914c9412971d42399825fb1b35c2be54e0b5e5e54c6891b3d
|
7
|
+
data.tar.gz: 96129c33bb5f7c21628d2578959283c72b4201d157992c0f2bf54f4881af6b8c6b85b309f86cab4a4307c249db4b3653973039c8273264540787e60740ec5a80
|
data/bin/npsearch
CHANGED
@@ -23,7 +23,7 @@ Banner
|
|
23
23
|
|
24
24
|
opt[:temp_dir] = File.join(Dir.pwd, '.temp',
|
25
25
|
Dir::Tmpname.make_tmpname('', nil))
|
26
|
-
opts.on('-
|
26
|
+
opts.on('-t', '--temp_dir path_to_temp_dir',
|
27
27
|
'The full path to the temp dir. NpSearch will create the folder and',
|
28
28
|
' then delete the folder once it has finished using them.',
|
29
29
|
' Default: Hidden folder in the current working directory') do |p|
|
@@ -36,6 +36,11 @@ Banner
|
|
36
36
|
opt[:num_threads] = n
|
37
37
|
end
|
38
38
|
|
39
|
+
opt[:debug] = false
|
40
|
+
opts.on('-d', '--debug', 'Run in debug mode') do
|
41
|
+
opt[:debug] = true
|
42
|
+
end
|
43
|
+
|
39
44
|
opt[:min_orf_length] = 30
|
40
45
|
opts.on('-l', '--min_orf_length N', Integer,
|
41
46
|
'The minimum length of a potential neuropeptide precursor.',
|
@@ -0,0 +1,112 @@
|
|
1
|
+
>1108382 length 1440 cvg_15.4_tip_1
|
2
|
+
CTGGCGTAATTCACGAGCCTCGATGTGAGACGTCAGATGTTCAGGCTATGACAGGATGTGCATGTTATCATGCGGCCCAAATCTTACCTTTTCCTCTTCT
|
3
|
+
TCTTAGGCCTTTTTGCTGTGCTGGATTGATCGGTGCATTCTTTTCTCTTGTGAGTGAGTGAGCCACAGATGTGGCACCCCATAGACTGCCTATTGCATTG
|
4
|
+
ATGATCTGGTAACTCACATCGCTGGCAAACTGTACAGTGAACGTAACCTGGTTTGACGCATCTCTGACAATCCTTGCAATGCTTATACGTGCTGCCATCT
|
5
|
+
TTGGACGTGCAGCTGTTGCATTGAGTACAGTGTATATTCTCTTCCGCAACGTAGCGTTTACACGCTTTACAAAACCTGTAGCCTTCATCGAGAGGAAGAT
|
6
|
+
GGATTTCGGAGGGTTGAAGATTGGTGAAGACTCTGACAGGGGAGCCTTTCTTACGAGCCTTCTTCTGATTGCTCTGGAACAACGGGTGATTATCATAGTC
|
7
|
+
AACCTTATAGTCAAGCATTGTAAAAGAAGGAAAACTATCCAGAATTCTTTGTTCTAAGAAGTATGGGAAGATCCAAAGTAGTGGAAGCTTAGCTGTTGAG
|
8
|
+
CCAAGGTCTTGTAACCCCATTGTCCATCTTCCCATCATCCTCTGGATGACCTCAGCCAGGATCTCCACCCTGCCTCCAAAAGGAGGGTCAGTAACCATAA
|
9
|
+
CGATACCATCGCCATGGTTACGATGCAAAAAGTCCTCGCAGATAGTCCTGCTCTTCTCGCCGTCTAAAAAGTGATCGTTGAACATGTTGTAACGACAGAA
|
10
|
+
GAGGCTCGGAGGGTAGAAATGACCATAACTATGGTCTATATCCAACAACAGACTGCTGAGGTCTGATGATGCCTCAGACTGTATCATCTCATGTAATCTG
|
11
|
+
GGAGACCCAACACACAGCACTCTGTCATAACCGAGCCTCTTAATGGTGGTGATCAGAAACTCAGTGGCTGTTTTAGAGAACAGATATTGAGCGTTTGTTT
|
12
|
+
TGTTGTCCTCTAGCGGGAGGAGGAGATAGGATGGTTGACTCAGGATAGCGTCGGTGAGGTCAGCCTTTATCTTATGCTTCGAGTGAGTCTGGACTTCTTG
|
13
|
+
TGGACGCAGTAACAGGCTGCAGGATTGACAAAATTTCCTCTCAGATGGACTCAAATCCTTAGTGTCCTGCATTCTTTGGAAAAAATGTTCATGGCTGGTC
|
14
|
+
CACGGCTGCTGTGATCTGTTGAGTTCCTCTCTGCTCATCTTCTTAGCCTCAGAGACTTTCTCATCAGCGAGCTGGTAAAAGCTGCAATCTTTACGATCTC
|
15
|
+
TGCATGCCGAGCAGGCATAGTAGCGCTGTTTCTTGCGCTTCCCATTGAACCGCTCAAACAGTAACGTCGGACCATGTGGACAACAAGGTGAATTCTGCAC
|
16
|
+
CAGAGACACCTCTACGCCTAGAGTATTTGCCACACTAGCA
|
17
|
+
>1108384 length 1440 cvg_51.3_tip_1
|
18
|
+
AAAAAAAAGACAATTTGTGTTCTTAGTAATTTTTATGTTTTTTTTCTTCGGGTAATCATGGAATCAAACAAACTTTTATACCTCACCGTTGGAGGCTTCA
|
19
|
+
TGATGGTAGGGGGCACCACACATACCAGGTTATTTTCCAATCTTTAAAGAGTATCCACACTTTCCATCTTACATAAACTTTCCAATGGAACGTACCGGCA
|
20
|
+
GTCCAACAGAACAGCCCTTGTGCACGCGCTGGATACTTTAAAATCCAATCAGAACATTCCTACCTTTCTTACTTTTGAGTCGCCTCACTGGTAATAGCTA
|
21
|
+
CACAAAATTCAAATTTTACACAAGTGAATATTATGCGCATCAGTATTTGAAACTTTCCAGTGGAACGATTTGGCACTCCAACGAAATAGCTTGTGTGCAC
|
22
|
+
GAACTGGGATTCTTATCCAATCAAATGAGAACAATCCTACTATTCCTTCTTTTGAGGCGCTCCCCAATGTTAATAGTTACTAAAAATTCCACAAATGAAT
|
23
|
+
CGATCCATCGTGTCATCATTTCTGCCTCTCCTGACTGATGTCACTCTCGAGTTTTTCCACCTCTCGTTCGATCCCAGTCCGTATATTCTCTGGCAGTTTC
|
24
|
+
CACTGCTGTAGGAGGTCCATGCGTTTGTAGTCTGGTAGCCACCATGTTTTACCTTGTTCTACGAGTTCTTCTTTGACGTTGGAGAAATCTACACCCTGTA
|
25
|
+
GCTCCGCCCTCTCCCAGCGTTTGTACCACGGCCTCGGCTTCAGTTTGACCTTGATATCATTGATGGGTACTGGTTTGCCAGGGGGGTGTTTGATAGGCAC
|
26
|
+
CATGTTAGGATCGATGGTGCTGTATTCCGGCAGAGCATCCTGTAGATATTTCAAGTCGCTGTCTAACCTCTTCTCTAACTTCAACATCTCAACCTTCTGG
|
27
|
+
ATGAGTGGGTTGTACAACTCAAAACAAATTTCAACTCCTAAATCGTTAATGACGTTGCGAAGGATGAATGTAGCTCCCAGCCCATAGCCACCTCTTCTTG
|
28
|
+
TACAAATACCAACAAATCGGTTGGATTTACCCTTTGCATATATGTCCGCTGTTGTCACTGCCATTATGCTGCCAACATAGAATTCAGGGATATCGATGAC
|
29
|
+
TTTCCTTCTCTTGTAGCAATCCTGTCTCTCTAAGTAATCAAGGAGTTTTGACCGTGTTCCCATCGGTGGCAATAACTCTGGACTTGTAAAGCGCTCTCTT
|
30
|
+
ATCTCACCGTACCCACTGCCATCGTCGTCATGTGTAAACTCGTGCTTCTTATCGGCTGTAGTTTCTGCAATGGTGATGTCATCTACCGATGTGGCGAAAT
|
31
|
+
TGCATTTACCAGCACCTGCAACCGAGACATGTTTATGTAGAGAACCAGCTCTACCGGCCAATCTTAAACTCGATCCAGCCCGCAATAAACACGAAGTTCG
|
32
|
+
CACCAGAACACATGCAGACGTCGTGGCCGCCATGATGAAT
|
33
|
+
>1108386 length 1440 cvg_65.7_tip_0
|
34
|
+
TTTTTTGTTGCACTTTTGAATCAAGCCCCTTGCTTTCTTTGTCTCTTACTGCTGTTTTTATGTCCTCATCAAAAAATTAGCATTTTAAAATAAACCCACC
|
35
|
+
ACCAGTAAATACTCTACCTGCTGAAAATTACACTCATAACAACAAACACAAACACAAGTTTTGGTCAGTTAAAGATGAGATTTAAGTATCATGCAGTAAA
|
36
|
+
GGATATGTTGTCCAGTCTGTTCAATCTTGTTTCCTCAGCTTTTTTTTTTAAAGAAGAAATTTAGAACTTTCACCGATACTTGATTCTTAAAGATATATAA
|
37
|
+
ACACACTCATGATCATCATGTGCGATTACTATAGAGTGACACAAGTAGGATACCACTACACACTTTACTTTATCAAGTAATAAACCACAAGGCCGGCTGG
|
38
|
+
GTAACTTTTGATTCATTTTGAGTTGAACGTTTCATTTTAAGCTGACATTAGATAAACGTGTTGTTGCATTTAACTTCTCGGCTAAAATGAACTTGTTCCC
|
39
|
+
TTTATTCCCTCAAATTCTTTTCAAACCGTAAATTTTAATTATATTCATTGTAAATATCTTGAGTATTTATTTCTATGGTTTATATCTTACTCTTCTATTA
|
40
|
+
ATAAGCGCTTTTAGTTGTGAGTATTAGAGTTATTAATAAAATGTTTTGCTCGTTATGATCCCCCAAAGGGGCTGTTCCACACATAACTGTTTTTGAATCT
|
41
|
+
TCAACAAATCTCATCAAATATGCTGGGATGAAACTAAAATAGACAATGAGAAGAGGGTGTGGAACAGTTTTAACTTGCGAGGGCCACGCTGATCTATTGC
|
42
|
+
TCATATGGTGGTGGCTTGCTATCAGGAGACTGGATAGCATCACTGTAGGTTGGCAGCTTGGGGAGCAGAGCCTCAGCATCCATGGCATAATTGCCAGATT
|
43
|
+
GGGCATTGTCTCCCATCTCACGGGCTGCGATGTACTTGTAGCATGACCACACACAGGCAATGCAGTAGGCCTTGACTACCATGATCATCATGAAGATGAC
|
44
|
+
CAGGGCAACGATGACAAACCACTGGTTGTCCAGATCCATCTTGTTGATGGGCAGCTTCTCCTTCCAGTGCATGAACTCCTGGTTGTACGGGAACTTGCCA
|
45
|
+
CTGTCTTCGTAGTCAAATTCTGGGTAGTATGACATTAGGCCGATAGCTGTCAGGCATGTGATGAAGAAATCAAACAGCTGCAGGCAGAAGAACGGCAGCA
|
46
|
+
GGTAACTTGACTGGTGCTGTGCAACTCCCTTAATCATCATGCTTGTGATCAGAAAGTAGCAACAGATGATGACCAGTCCAACACAGTAATCAGATGCATC
|
47
|
+
ATGCTGGTCCTCAATTGACAAACTGGTGGACGTGCTGTCCTCAGTCTTGACAGTGCCCTTGATGATCATAGATGCAATAAGCAGCATAGCACATACTTGC
|
48
|
+
CCAACCATGTGCCATAACCCGATCAAAACCGAGCCAGTTC
|
49
|
+
>1108388 length 1440 cvg_25.5_tip_1
|
50
|
+
CAGCTTATGTAGTTATCCATTGTATCCCACTGGCAGCATTGGAAACACAAAGACCATTAGAGCAGAAAGCAATGGTAATGTGTTCTCATTGGGTACTTTC
|
51
|
+
GCTCAGTAACTGGTGATTAGTGGTCAACCATAGCATGATCAAGGCCAACATTGTCATTTTGTGGATTGCTGTTTCGTTTACTGAATACTTCTAGGGTCTT
|
52
|
+
ATTAACTGAAGATACACCGCACGCCAGGTCAGCAGCCTGAGAGCTTAGACCGAAAGCAAGAGTGCTGGCTTCGCACACGCTCGTTTCTTTGATAGCTAAG
|
53
|
+
GAGTAAACACTGTCATCATCTGAGACCATGGAAGTTGCAGCACTGGGTCCTTTGGAATACTACATCATGGGATTGATACTCACTGTGGAAGCTATACTTG
|
54
|
+
GAACAATATGCTGTGTTCGTCTTCTCCTGGTGTATTTGAAAAATCCAACCCTCCACCAACCGCAGAGTCTTCTAGGCATCACACTGTGCATTGGAGACTT
|
55
|
+
GGGAATAGCTCTGATGTGCCCATTTGCCGCCTTTGCAAGCTTCAGTGAAACCTGGCCATTTGGAGATGAGTACTGCCAGCTGTATGCCTTTGCTGGGATG
|
56
|
+
CTGTTTGGTACACTCAGCATATCAGCTATGGCATGCTTGGCTTTGGACAAATATTACTCAAGCTCAAACGATGCTAAAGGGGGTTCTAGTCAGCCTTACA
|
57
|
+
TCTTGATTACATCAATCATCTGGCTAAACGCCCTCTTCTGGTCACTAACCCCACTGAGTCCCATCGGTTGGGGGCGCTATGCCATCGAACCGCCTAAATC
|
58
|
+
GACGTGCATGTTGGACTTTGCAAACCGTGAGCCATCATACATGATGTACTTGTTCTTAATGACAAGCACGGTCTATGCGTTGCCAGTAGGCGCCATCTTA
|
59
|
+
TGGTGCTTAGTGAAGCTCAGAAAGGGAAAAGATCCAAACAACGGAAAGAGCAAGGTATGTCTTTTGGTGTTGTTCTCATTGATTGTGTACTGGGGAGCCT
|
60
|
+
ATGGAATCGTAGCACTATGGGCAGCACTAGATGACATACATAATGTCCCGCTCCGATTGGTTGCTGCTGCTCCAATCCTGGCCAAGATCTGTCCAATCGG
|
61
|
+
AAACACTGTAATGCAGGTGCTGACCAATAGGAACATCCGTTGTCTGATGTACAGAAAGGAAACAGTTGCATCCAACAAGAGGGAATGAGGAGCATAATGC
|
62
|
+
AGAAAATAATTTCGTGTTATGATAGGTCTAATAATGCCGGTGTTCTCCTTTTCTTTGCACACGCTAAAGGCCATATTTTGTTACACAGCATTGTTTTTAT
|
63
|
+
TTCATATCGTTATTGTAGACTTGTATTGCCATTTAAGTTGCAATGTCAGATTTCCTTAGATTTTTTTTTTATCTGAACTAAGTTTGACAGATGGTGCTTT
|
64
|
+
TTTAAAACGAAAAAAAACCACAAATTTCCTTAAACCATGG
|
65
|
+
>1108390 length 1440 cvg_34.1_tip_1
|
66
|
+
GCAACCTTTTGGGTACAGAATGCGAGAAAATCTGGGCAGATCGGTCACAGTATTTTAAGGATTTCTAGGTTAACTTTGAATCAAATAAATTCAAACTCTA
|
67
|
+
GGAGGAAGATTATCACATCATGCTGCTGTCGTGGGATTCATCACAAACCGGTATTTGTTTCATTACTGAAAGACAAACTTAAAGGAAGTCTTAATAGGAG
|
68
|
+
CCTGGGTCCATGTTACTCCACAAATTCACATTTGTTTCCTAAAGAAGTGACTAGTCCGGAGAAACTCACGCAAGATGAAATAACGGCCCTGCACCAAAAT
|
69
|
+
ATTTGTAAGGAATTAATTACAGATGAGAGATCTTTACAAGAGGTTGCACAGTACTATTTTGATGGAAAAGGCAAAGCGTTTAGACCCATGTTGGTTTTAC
|
70
|
+
TCACGGCTGGTGCATGCAATACTCATACTCAGGGTTCTAATAGTAAACTGGTGGATACTCAGAGACGGATTGCAATGATAGCAGAGATGATCCACACAGC
|
71
|
+
GAGTCTCATGCATGATGATGTCATCGATAATGCAGATACAAGGAGGAACAAAACAGCAATCAATGAAATGTGGGGACAGAGAAAGGCAATTTTAGCTGGA
|
72
|
+
GACTTTGTTCTATCAGTTTCATCTCAGGTTTTAGCAAGAATAGGAAACGAGGAAGTTGTTTTAATTCTTTCCCAAGTCATAGAAGATCTTGTTAGAGGGG
|
73
|
+
AATTTATGCAACTTGGTTCTAAGGAGGATGAAAATGAGCGCTTTGCCCACTACCTCAAGAAGACGTTCAAGAAAACAGCAAGTCTCATGGCTTACAGCTG
|
74
|
+
TCAAGCAGTAGCAATTCTGGGTGGTTGCAGTGCTGAGGTGTGTCAGATAGCCTACGAGTACGGCCGGAATACAGGCATGGCATTCCAGCTGATAGATGAT
|
75
|
+
GTATTAGACTTTGTATCCAGTGACGCCGCTATGGGGAAACCGACGGCTGCTGACTTGAAGCTGGGCCTTGCAACCGGGCCAGTGCTGTTTGCTGCCGAGA
|
76
|
+
AGTTTCCAGAGTTGGATGCCATGATCATGAGACGGTTTAGCGAGACTGGTGATGTAATGGCTGCAAGGGAAGCTGTTGCCAAGACTGAAGGCATCGAGCA
|
77
|
+
GACAAGACACATCGCTAACCAGCACAGCTTAGAAGCTCAGAAACAGATCAGTAAACTACATCCCAGTCCAGAGAGGCAAGCACTTATAGAACTTGCACAG
|
78
|
+
CGAGTTGTTACAAGAATAAAGTAAATCCGTAGTTAAAATCCTTGTGTCAGTGCAAGAATGTTGGATCTCTAAAAACAGCCTTTATGCAATACAGGTTGTG
|
79
|
+
AAAAATGCACAATGTTGTCTTTCACCATAACTACCCCTGATGTGCACTAGTGTACACAGAAACCCGGTTGCGTCAAAATGAAATGGTGTGAGACAAGACG
|
80
|
+
TCCTAATGAAATAGAGGAAACACGTTTTGCATAGATTTCT
|
81
|
+
>1108392 length 1440 cvg_27.6_tip_1
|
82
|
+
ATTTGATTTGATTTGAAATGGTTAATTAAAGTAAAAAACTGTTTCACAGCCAGTAGGCTGAATTACATGATTTGTATTAGTTAAAAAATATTCAAATGTA
|
83
|
+
TTATAGAAATTACAATAAAAACAAACAAATAATGCATAGGCAGTGGAACAGAAAAATGAACGTGGGGATCCCCAAGGAAAACAAGATTTTTACAAGAGCA
|
84
|
+
CTTAAAATAAAACAAGGTAAAATAAGGGCACACCGCTGTGAACGACCCAAAACAATGCAATTAAGTAGATTTAATAATAATAAAAAAAAAACAATAAAAA
|
85
|
+
TTTTAATAATACGCGTGAGATAAGGAACTTGACTGTTTTAAACCATTCAGCTCTACAGCGATGTATAGTATGCTTTGTTGAACTCCGAGTTTGGTGAATT
|
86
|
+
CTTACGTTTCTTCGGTGTAGCAAATGACGGAGGCTTGTTAGTGAATTTTGCATGGATTATGCAAAAGAAAGGCACGATTGAAAGCACCTGCTCTCCCAGG
|
87
|
+
TAGGGAGCTGACAAGACAGTGCTTGAGCGCTTGTTCATATTGACTATAGTTGGTATCAAGTATAATCCTGCCTGCTCGCTTCTGTGTCCACTCATGGCTT
|
88
|
+
ATACACTGCTCCTTTGTTAGACCTCAAGATCAGACAACAGTCAGCATACTCCAGGACTAGCCTGACAATCGTCATGTAGTAAGTCAGACGGTCGCCTCGT
|
89
|
+
GGAAAAGTTGTCTAAATTGAGAGAATGAAGACATATTGATGACTTTTGGAAGGTTTAATGATGGACGCAATATGCGCATTCAGTTCCTAGCAATTTAACG
|
90
|
+
GTGTTATGTTGAATGATGACCTCTTTTTTCCCAAAGGAAACGTTCATTGTTGCGCACTTGTTCAGGTTTAGTTGTATTTGTTCCTGTCTGACCAATCATA
|
91
|
+
GGGTAACCAGATAGTATCAGGCAACATTGAATGAGCTGATGCATGGCGAGTCTCTACCACTGTTGGGTTGTTCACGTGTCTGAGAACGTGAAGAGCATTA
|
92
|
+
GCTGCATCACCAGTGGGCAGCATCATGTTGGTGTCTTTACCACATAGAGCATCTTTGATGAGCAACAGAAAAAGTATAGGTCCCAATTTAGTTCCTTGGG
|
93
|
+
GAACACCGGCTGTTGTTGACTGCCACACAGACTTCACACTTTTTGTACTTCACCCATTGTCTGCAATCTGTTAAAAAGTTTGCGAATTATGTTACCAGAC
|
94
|
+
TGACGTCAAGTTTGACCAACTTGGATATTGCGATTTCGTGGTCAATGCCATTAAATGCTTTGATGAAGTCAGTAAGGAGGACCGTCGACTCGTATGACAT
|
95
|
+
GAAAGAAACTCGAGGGGTCTCGCGAAACATTTACAATTTAAAACAGGATCATTAAGAATTAATTAAAGGAAGATCAGCGTATCGACTCTGACAAATACCA
|
96
|
+
GCAGGTTCAGGTTAAGGTTCAGGTTTATTTTCCATACCAT
|
97
|
+
>1108394 length 1440 cvg_66.6_tip_0
|
98
|
+
AGATAACGACGGGAAAAAGTCAAGATAATTTCACGTACACAACTCTTCAGAAGAAGAACGGAAGCAAGAAAGGATAAGATGATGGTGCGATTCGTAGCCT
|
99
|
+
TACTCGGGGCAGTCAGCCTACTGGTATGTCAATCTGCAGGACTTGATGCCGCAGACGTCGAGGAACAAGACGAGTTCAACAAACCCTATGCTCCTGACAG
|
100
|
+
TTCGTATGCGGATTTAAATGCACTTTTGGGCAACAATGTGCCAAGTCTACACAGCGCCTCCAAGCGTCAACAAAGTGACAGGGAGCGTGAGGTTGAAGCA
|
101
|
+
GCCCAGACGCAATTTTACCCGTATGGAAGAAGAACTGATCCCAGGAAAGCGTCTGGTGGATTCACCTTTGGCAAGAGAGGGCAGTATTTTATCCCCATTC
|
102
|
+
CGTACGAGAAACGAGAGATGGATGAGGTGAACCCGTACAGCGTAGCTAAGCGCGACGACGAGCTGACCGGACTAGAGGAGTACCAAGCTAGCAAGAGGTC
|
103
|
+
AGGTCCTTATTCCTTTAACAGCGGGCTGACCTTTGGCAAGAGGGAACCCGAGAAGAGGAACATATTCGGATCTTATGACTTCGGGAAGCGGGCTTACGGC
|
104
|
+
AACAATTTCAGCTTCGGCAAGCGAGGCATGGGAGTGTCCAGTTTTAGCTTTGGCAAACGATCCGGACTTGAGGGTGAACAAATGATGCCGGAAGACAAAC
|
105
|
+
GGGCGTTCGGAGACTTTTCCTTCGGCAAGCGCAATAATGGTCTGTCCAGCTTCACATTCGGCAAGCGAGAGGGTGAACGATAGAACACGAGAGGGCGCCA
|
106
|
+
TACTGTCTACAATGTGATAATTATAGTATCTTAATTATTTCAAAACCATACTTGATAAGAAATAACTGCTTGCGTTTTGAGTTAAAACATCAGTTCCAAG
|
107
|
+
TATACAAACAAATTTTAAATAACATTGTTTAAAGCACGGATTTTGAATTAAAATGAAACGATAACGCAAAAGTGTGTAAATATAAGCATAGTAAGCTATC
|
108
|
+
AACATCTAGAATTATGGAAGATATTTCAAGATACTAAAACTTGGAAGCTTTAGTTAATTTTAGATCTGGTTGAGATTCAAGTCTTCTTTTCTAGATTTTG
|
109
|
+
TTTCCAGCCAGGGCTTTTACTATCCTTAACGTTTGGGGAAATTGCCAGGGTTGTGGTCACCTTGTTAAGGTCCTAAACAACACATTTTTTGTTAGTAAAA
|
110
|
+
AATTACCCAATTAAGACAATTCATGAGTAAGCGACAAAAACTTAATAATTTAGCATATTTGCTTACGTTAGTTACTACAACAATAATTGAAACTTTCTTA
|
111
|
+
AACACGGTTTTTCTAGTATGCTTTCCTGGTCAGCAATTAACTTGTAGATGTTTTTCAAATACTATGAATTCATATCTACGTAGAAATCTTAAAGATGTTT
|
112
|
+
TCTGTGATTAAGTTTAGAACAACCTTAGCCAACAATCTCC
|
data/lib/npsearch.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'bio'
|
2
|
+
require 'english'
|
2
3
|
require 'fileutils'
|
3
4
|
|
4
5
|
require 'npsearch/arg_validator'
|
6
|
+
require 'npsearch/logger'
|
5
7
|
require 'npsearch/output'
|
6
8
|
require 'npsearch/pool'
|
7
9
|
require 'npsearch/scoresequence'
|
@@ -11,21 +13,23 @@ require 'npsearch/signalp'
|
|
11
13
|
# Top level module / namespace.
|
12
14
|
module NpSearch
|
13
15
|
class <<self
|
16
|
+
attr_accessor :logger
|
14
17
|
attr_accessor :opt
|
15
18
|
attr_accessor :sequences
|
16
19
|
attr_reader :sorted_sequences
|
17
20
|
|
18
21
|
def init(opt)
|
19
|
-
@opt
|
22
|
+
@opt = opt
|
23
|
+
ArgumentsValidators.run(opt)
|
20
24
|
@sequences = []
|
21
25
|
@sorted_sequences = nil
|
22
|
-
@pool =
|
23
|
-
|
24
|
-
extract_orf
|
26
|
+
@pool = initialise_thread_pool
|
27
|
+
create_temp_directory
|
28
|
+
extract_orf
|
25
29
|
end
|
26
30
|
|
27
31
|
def run
|
28
|
-
input_file = @opt[:type] == :
|
32
|
+
input_file = @opt[:type] == :genetic ? @opt[:orf] : @opt[:input_file]
|
29
33
|
iterate_input_file(input_file)
|
30
34
|
@sorted_sequences = @sequences.sort_by(&:score).reverse
|
31
35
|
Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
|
@@ -33,16 +37,37 @@ module NpSearch
|
|
33
37
|
remove_temp_dir
|
34
38
|
end
|
35
39
|
|
40
|
+
def logger
|
41
|
+
@logger ||= Logger.new(STDOUT, @opt[:debug])
|
42
|
+
end
|
43
|
+
|
36
44
|
private
|
37
45
|
|
46
|
+
def initialise_thread_pool
|
47
|
+
return if @opt[:num_threads] == 1
|
48
|
+
logger.debug "Creating a thread pool of size #{@opt[:num_threads]}"
|
49
|
+
Pool.new(@opt[:num_threads])
|
50
|
+
end
|
51
|
+
|
52
|
+
def create_temp_directory
|
53
|
+
FileUtils.mkdir_p(@opt[:temp_dir])
|
54
|
+
logger.debug "Successfully creating temp directory at: #{@opt[:temp_dir]}"
|
55
|
+
end
|
56
|
+
|
38
57
|
# Uses getorf from EMBOSS package to extract all ORF
|
39
58
|
def extract_orf(input = @opt[:input_file], minsize = 90)
|
59
|
+
return if @opt[:type] == :protein
|
60
|
+
logger.debug 'Attempting to extract ORF.'
|
40
61
|
@opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
|
41
|
-
|
42
|
-
|
62
|
+
cmd = "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
|
63
|
+
" -minsize #{minsize} >/dev/null 2>&1"
|
64
|
+
logger.debug "Running: #{cmd}"
|
65
|
+
system(cmd)
|
66
|
+
logger.debug("EGexit Code: #{$CHILD_STATUS.exitstatus}")
|
43
67
|
end
|
44
68
|
|
45
69
|
def iterate_input_file(input_file)
|
70
|
+
logger.debug "Iterating the Input File: #{input_file}"
|
46
71
|
Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
|
47
72
|
if @opt[:num_threads] > 1
|
48
73
|
@pool.schedule(entry) { |e| initialise_seqs(e) }
|
@@ -54,10 +79,10 @@ module NpSearch
|
|
54
79
|
end
|
55
80
|
|
56
81
|
def initialise_seqs(entry)
|
82
|
+
logger.debug "-- Analysing: '#{entry.definition}' (#{entry.aaseq.length})"
|
57
83
|
return if entry.aaseq.length > @opt[:max_orf_length]
|
58
84
|
sp = Signalp.analyse_sequence(entry.aaseq.to_s)
|
59
85
|
return if sp[:sp] == 'N'
|
60
|
-
# seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
|
61
86
|
seq = Sequence.new(entry, sp)
|
62
87
|
ScoreSequence.run(seq, @opt)
|
63
88
|
@sequences << seq
|
@@ -65,6 +90,7 @@ module NpSearch
|
|
65
90
|
|
66
91
|
def remove_temp_dir
|
67
92
|
return unless File.directory?(@opt[:temp_dir])
|
93
|
+
logger.debug "Deleting Temporary directory: #{@opt[:temp_dir]}"
|
68
94
|
FileUtils.rm_rf(@opt[:temp_dir])
|
69
95
|
end
|
70
96
|
end
|
@@ -1,69 +1,86 @@
|
|
1
1
|
require 'bio'
|
2
|
+
require 'forwardable'
|
2
3
|
|
3
4
|
# Top level module / namespace.
|
4
5
|
module NpSearch
|
5
6
|
# A class that validates the command line opts
|
6
7
|
class ArgumentsValidators
|
7
8
|
class << self
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators NpSearch, :logger
|
11
|
+
|
8
12
|
def run(opt)
|
9
13
|
assert_file_present('input fasta file', opt[:input_file])
|
10
14
|
opt[:input_file] = File.expand_path(opt[:input_file])
|
11
15
|
assert_input_file_not_empty(opt[:input_file])
|
12
16
|
assert_input_file_probably_fasta(opt[:input_file])
|
13
|
-
opt[:type]
|
17
|
+
opt[:type] = assert_input_sequence(opt[:input_file])
|
14
18
|
opt[:num_threads] = check_num_threads(opt[:num_threads])
|
15
19
|
assert_binaries('SignalP 4.1 Script', opt[:signalp_path])
|
20
|
+
logger.debug "The validated OPT hash contains: #{opt}"
|
16
21
|
opt
|
17
22
|
end
|
18
23
|
|
19
24
|
private
|
20
25
|
|
21
26
|
def assert_file_present(desc, file, exit_code = 1)
|
27
|
+
logger.debug "Testing if the #{desc} exists: '#{file}'."
|
22
28
|
return if file && File.exist?(File.expand_path(file))
|
23
|
-
|
29
|
+
error_msg = "*** Error: Couldn't find the #{desc}: '#{file}'."
|
30
|
+
logger.fatal error_msg
|
31
|
+
$stderr.puts error_msg
|
24
32
|
exit exit_code
|
25
33
|
end
|
26
34
|
|
27
35
|
def assert_input_file_not_empty(file)
|
36
|
+
logger.debug "Testing if the input file ('#{file}') is empty."
|
28
37
|
return unless File.zero?(File.expand_path(file))
|
29
|
-
|
30
|
-
|
38
|
+
error_msg = "*** Error: The input_file ('#{file}') seems to be empty."
|
39
|
+
logger.fatal error_msg
|
40
|
+
$stderr.puts error_msg
|
31
41
|
exit 1
|
32
42
|
end
|
33
43
|
|
34
44
|
def assert_input_file_probably_fasta(file)
|
45
|
+
logger.debug("Testing whether the input, ('#{file}') is a fasta file.")
|
35
46
|
File.open(file, 'r') do |f|
|
36
|
-
fasta =
|
47
|
+
fasta = f.readline[0] == '>' ? true : false
|
37
48
|
return fasta if fasta
|
38
49
|
end
|
39
|
-
|
40
|
-
'
|
50
|
+
error_msg = "*** Error: The input file (#{file}) does not seems to be" \
|
51
|
+
' to be a fasta file.'
|
52
|
+
logger.fatal error_msg
|
53
|
+
$stderr.puts error_msg
|
41
54
|
exit 1
|
42
55
|
end
|
43
56
|
|
44
57
|
def assert_input_sequence(file)
|
45
58
|
type = type_of_sequences(file)
|
46
59
|
return type unless type.nil?
|
47
|
-
|
48
|
-
|
49
|
-
|
60
|
+
error_msg = '*** Error: The input files seems to contain a mixture of' \
|
61
|
+
' both protein and nucleotide data.' \
|
62
|
+
' Please correct this and try again.'
|
63
|
+
logger.fatal error_msg
|
64
|
+
$stderr.puts error_msg
|
50
65
|
exit 1
|
51
66
|
end
|
52
67
|
|
53
68
|
# determine file sequence type based on first 500 lines
|
54
69
|
def type_of_sequences(file)
|
70
|
+
logger.debug 'Checking the type of sequence in the input file based' \
|
71
|
+
' on the first 500 lines.'
|
55
72
|
fasta_content = File.foreach(file).first(500).join("\n")
|
56
73
|
# the first sequence does not need to have a fasta definition line
|
57
74
|
sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
|
58
75
|
# get all sequence types
|
59
76
|
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
60
77
|
.uniq.compact
|
78
|
+
logger.debug " The guessed typed of Sequences are: #{sequence_types}"
|
61
79
|
return nil if sequence_types.empty?
|
62
80
|
sequence_types.first if sequence_types.length == 1
|
63
81
|
end
|
64
82
|
|
65
83
|
def guess_sequence_type(seq)
|
66
|
-
# removing non-letter and ambiguous characters
|
67
84
|
cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
|
68
85
|
return nil if cleaned_sequence.length < 10 # conservative
|
69
86
|
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
@@ -71,20 +88,27 @@ module NpSearch
|
|
71
88
|
end
|
72
89
|
|
73
90
|
def check_num_threads(num_threads)
|
91
|
+
logger.debug "Checking the number of threads: #{num_threads}"
|
74
92
|
num_threads = Integer(num_threads)
|
75
93
|
unless num_threads > 0
|
76
|
-
|
77
|
-
|
94
|
+
warn_msg = 'Number of threads can not be lower than 0. Changing' \
|
95
|
+
' number of threads to 1'
|
96
|
+
logger.warn warn_msg
|
97
|
+
$stderr.puts warn_msg
|
78
98
|
num_threads = 1
|
79
99
|
end
|
80
100
|
return num_threads unless num_threads > 256
|
81
|
-
|
82
|
-
|
101
|
+
warn_msg = "Number of threads set at #{num_threads} is unusually high."
|
102
|
+
logger.warn warn_msg
|
103
|
+
$stderr.puts warn_msg
|
83
104
|
end
|
84
105
|
|
85
106
|
def assert_binaries(desc, bin)
|
107
|
+
logger.debug "Checking #{desc} binary at: #{bin}."
|
86
108
|
return if command?(bin.to_s)
|
87
|
-
|
109
|
+
warn_msg = "NpSearch is unable to use the #{desc} at #{bin}"
|
110
|
+
logger.warn warn_msg
|
111
|
+
$stderr.puts warn_msg
|
88
112
|
end
|
89
113
|
|
90
114
|
# Return `true` if the given command exists and is executable.
|
data/lib/npsearch/signalp.rb
CHANGED
@@ -8,15 +8,15 @@ module NpSearch
|
|
8
8
|
class Signalp
|
9
9
|
class << self
|
10
10
|
extend Forwardable
|
11
|
-
def_delegators NpSearch, :opt
|
11
|
+
def_delegators NpSearch, :opt, :logger
|
12
12
|
|
13
13
|
def analyse_sequence(seq)
|
14
14
|
sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
|
15
15
|
sp dmaxcut networks orf)
|
16
16
|
seqs = setup_analysis(seq)
|
17
17
|
sp_results = []
|
18
|
-
seqs.each do |
|
19
|
-
sp_results << run_signalp(
|
18
|
+
seqs.each do |sequence|
|
19
|
+
sp_results << run_signalp(sequence, sp_headers)
|
20
20
|
end
|
21
21
|
sp_results.sort_by { |h| h[:d] }.reverse[0]
|
22
22
|
end
|
@@ -24,10 +24,10 @@ module NpSearch
|
|
24
24
|
private
|
25
25
|
|
26
26
|
def run_signalp(seq, sp_headers)
|
27
|
-
|
27
|
+
timeout(300) do
|
28
28
|
cmd = "echo '>seq\n#{seq}\n' | #{opt[:signalp_path]} -t euk" \
|
29
|
-
|
30
|
-
stdin, stdout, stderr
|
29
|
+
' -f short -U 0.34 -u 0.34'
|
30
|
+
stdin, stdout, stderr = Open3.popen3(cmd)
|
31
31
|
out = stdout.gets(nil).split("\n").delete_if { |l| l[0] == '#' }
|
32
32
|
if out.nil? || out.empty?
|
33
33
|
print stdout
|
@@ -40,13 +40,13 @@ module NpSearch
|
|
40
40
|
stdin.close; stdout.close; stderr.close
|
41
41
|
end
|
42
42
|
rescue Timeout::Error
|
43
|
-
no_results = [0,0,1,1,1,1,1,1,1,'N',1,1, seq]
|
43
|
+
no_results = [0, 0, 1, 1, 1, 1, 1, 1, 1, 'N', 1, 1, seq]
|
44
44
|
return Hash[sp_headers.map(&:to_sym).zip(no_results)]
|
45
45
|
end
|
46
46
|
|
47
47
|
def setup_analysis(seq)
|
48
48
|
orfs = seq.scan(/(?=(M\w{#{opt[:min_orf_length]},}))./).flatten
|
49
|
-
|
49
|
+
opt[:type] == :protein || orfs.empty? || orfs.nil? ? [seq] : orfs
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/npsearch/version.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require_relative 'test_helper'
|
2
|
-
|
2
|
+
require 'npsearch'
|
3
3
|
require 'npsearch/arg_validator'
|
4
4
|
|
5
5
|
# Class to test the how well the CLI arguments are validated.
|
6
6
|
class TestInputArgumentValidator < Minitest::Test
|
7
7
|
def setup
|
8
|
+
NpSearch.logger = Logger.new(STDOUT, true)
|
8
9
|
@c = NpSearch::ArgumentsValidators
|
9
|
-
@opt = { num_threads: 1, min_orf_length: 30 }
|
10
|
+
@opt = { num_threads: 1, min_orf_length: 30, debug: true }
|
10
11
|
end
|
11
12
|
|
12
13
|
def test_assert_file_present
|
data/test/test_sequence.rb
CHANGED
@@ -17,7 +17,7 @@ class TestSequences < Minitest::Test
|
|
17
17
|
sp = { name: 'test_sequences', cmax: '0.492', cmax_pos: '31', ymax: '0.612',
|
18
18
|
ymax_pos: '31', smax: '0.950', smax_pos: '17', smean: '0.786',
|
19
19
|
d: '0.706', sp: 'Y', dmaxcut: '0.300', networks: 'SignalP-noTM',
|
20
|
-
orf: sequence.seq}
|
20
|
+
orf: sequence.seq }
|
21
21
|
@seq = NpSearch::Sequence.new(sequence, sp, 2)
|
22
22
|
end
|
23
23
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: npsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Moghul et al.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,8 +115,10 @@ files:
|
|
115
115
|
- bin/npsearch
|
116
116
|
- exemplar_data/README.md
|
117
117
|
- exemplar_data/genetic_data.fa
|
118
|
+
- exemplar_data/protein.fa
|
118
119
|
- lib/npsearch.rb
|
119
120
|
- lib/npsearch/arg_validator.rb
|
121
|
+
- lib/npsearch/logger.rb
|
120
122
|
- lib/npsearch/output.rb
|
121
123
|
- lib/npsearch/pool.rb
|
122
124
|
- lib/npsearch/scoresequence.rb
|