npsearch 2.1.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/npsearch +6 -1
- data/exemplar_data/protein.fa +112 -0
- data/lib/npsearch.rb +34 -8
- data/lib/npsearch/arg_validator.rb +40 -16
- data/lib/npsearch/logger.rb +11 -0
- data/lib/npsearch/signalp.rb +8 -8
- data/lib/npsearch/version.rb +1 -1
- data/test/test_argument_validator.rb +3 -2
- data/test/test_sequence.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6357adc8080129fa2b76256669596bd7c0b956ba
|
4
|
+
data.tar.gz: a78e79d1462978dcb599d62d848a604aa86d883f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6557749bff5b29c99dc26edcf577df767f64382f6eb6cf8ab3f62c73b9ef5916fba201788efcb38914c9412971d42399825fb1b35c2be54e0b5e5e54c6891b3d
|
7
|
+
data.tar.gz: 96129c33bb5f7c21628d2578959283c72b4201d157992c0f2bf54f4881af6b8c6b85b309f86cab4a4307c249db4b3653973039c8273264540787e60740ec5a80
|
data/bin/npsearch
CHANGED
@@ -23,7 +23,7 @@ Banner
|
|
23
23
|
|
24
24
|
opt[:temp_dir] = File.join(Dir.pwd, '.temp',
|
25
25
|
Dir::Tmpname.make_tmpname('', nil))
|
26
|
-
opts.on('-
|
26
|
+
opts.on('-t', '--temp_dir path_to_temp_dir',
|
27
27
|
'The full path to the temp dir. NpSearch will create the folder and',
|
28
28
|
' then delete the folder once it has finished using them.',
|
29
29
|
' Default: Hidden folder in the current working directory') do |p|
|
@@ -36,6 +36,11 @@ Banner
|
|
36
36
|
opt[:num_threads] = n
|
37
37
|
end
|
38
38
|
|
39
|
+
opt[:debug] = false
|
40
|
+
opts.on('-d', '--debug', 'Run in debug mode') do
|
41
|
+
opt[:debug] = true
|
42
|
+
end
|
43
|
+
|
39
44
|
opt[:min_orf_length] = 30
|
40
45
|
opts.on('-l', '--min_orf_length N', Integer,
|
41
46
|
'The minimum length of a potential neuropeptide precursor.',
|
@@ -0,0 +1,112 @@
|
|
1
|
+
>1108382 length 1440 cvg_15.4_tip_1
|
2
|
+
CTGGCGTAATTCACGAGCCTCGATGTGAGACGTCAGATGTTCAGGCTATGACAGGATGTGCATGTTATCATGCGGCCCAAATCTTACCTTTTCCTCTTCT
|
3
|
+
TCTTAGGCCTTTTTGCTGTGCTGGATTGATCGGTGCATTCTTTTCTCTTGTGAGTGAGTGAGCCACAGATGTGGCACCCCATAGACTGCCTATTGCATTG
|
4
|
+
ATGATCTGGTAACTCACATCGCTGGCAAACTGTACAGTGAACGTAACCTGGTTTGACGCATCTCTGACAATCCTTGCAATGCTTATACGTGCTGCCATCT
|
5
|
+
TTGGACGTGCAGCTGTTGCATTGAGTACAGTGTATATTCTCTTCCGCAACGTAGCGTTTACACGCTTTACAAAACCTGTAGCCTTCATCGAGAGGAAGAT
|
6
|
+
GGATTTCGGAGGGTTGAAGATTGGTGAAGACTCTGACAGGGGAGCCTTTCTTACGAGCCTTCTTCTGATTGCTCTGGAACAACGGGTGATTATCATAGTC
|
7
|
+
AACCTTATAGTCAAGCATTGTAAAAGAAGGAAAACTATCCAGAATTCTTTGTTCTAAGAAGTATGGGAAGATCCAAAGTAGTGGAAGCTTAGCTGTTGAG
|
8
|
+
CCAAGGTCTTGTAACCCCATTGTCCATCTTCCCATCATCCTCTGGATGACCTCAGCCAGGATCTCCACCCTGCCTCCAAAAGGAGGGTCAGTAACCATAA
|
9
|
+
CGATACCATCGCCATGGTTACGATGCAAAAAGTCCTCGCAGATAGTCCTGCTCTTCTCGCCGTCTAAAAAGTGATCGTTGAACATGTTGTAACGACAGAA
|
10
|
+
GAGGCTCGGAGGGTAGAAATGACCATAACTATGGTCTATATCCAACAACAGACTGCTGAGGTCTGATGATGCCTCAGACTGTATCATCTCATGTAATCTG
|
11
|
+
GGAGACCCAACACACAGCACTCTGTCATAACCGAGCCTCTTAATGGTGGTGATCAGAAACTCAGTGGCTGTTTTAGAGAACAGATATTGAGCGTTTGTTT
|
12
|
+
TGTTGTCCTCTAGCGGGAGGAGGAGATAGGATGGTTGACTCAGGATAGCGTCGGTGAGGTCAGCCTTTATCTTATGCTTCGAGTGAGTCTGGACTTCTTG
|
13
|
+
TGGACGCAGTAACAGGCTGCAGGATTGACAAAATTTCCTCTCAGATGGACTCAAATCCTTAGTGTCCTGCATTCTTTGGAAAAAATGTTCATGGCTGGTC
|
14
|
+
CACGGCTGCTGTGATCTGTTGAGTTCCTCTCTGCTCATCTTCTTAGCCTCAGAGACTTTCTCATCAGCGAGCTGGTAAAAGCTGCAATCTTTACGATCTC
|
15
|
+
TGCATGCCGAGCAGGCATAGTAGCGCTGTTTCTTGCGCTTCCCATTGAACCGCTCAAACAGTAACGTCGGACCATGTGGACAACAAGGTGAATTCTGCAC
|
16
|
+
CAGAGACACCTCTACGCCTAGAGTATTTGCCACACTAGCA
|
17
|
+
>1108384 length 1440 cvg_51.3_tip_1
|
18
|
+
AAAAAAAAGACAATTTGTGTTCTTAGTAATTTTTATGTTTTTTTTCTTCGGGTAATCATGGAATCAAACAAACTTTTATACCTCACCGTTGGAGGCTTCA
|
19
|
+
TGATGGTAGGGGGCACCACACATACCAGGTTATTTTCCAATCTTTAAAGAGTATCCACACTTTCCATCTTACATAAACTTTCCAATGGAACGTACCGGCA
|
20
|
+
GTCCAACAGAACAGCCCTTGTGCACGCGCTGGATACTTTAAAATCCAATCAGAACATTCCTACCTTTCTTACTTTTGAGTCGCCTCACTGGTAATAGCTA
|
21
|
+
CACAAAATTCAAATTTTACACAAGTGAATATTATGCGCATCAGTATTTGAAACTTTCCAGTGGAACGATTTGGCACTCCAACGAAATAGCTTGTGTGCAC
|
22
|
+
GAACTGGGATTCTTATCCAATCAAATGAGAACAATCCTACTATTCCTTCTTTTGAGGCGCTCCCCAATGTTAATAGTTACTAAAAATTCCACAAATGAAT
|
23
|
+
CGATCCATCGTGTCATCATTTCTGCCTCTCCTGACTGATGTCACTCTCGAGTTTTTCCACCTCTCGTTCGATCCCAGTCCGTATATTCTCTGGCAGTTTC
|
24
|
+
CACTGCTGTAGGAGGTCCATGCGTTTGTAGTCTGGTAGCCACCATGTTTTACCTTGTTCTACGAGTTCTTCTTTGACGTTGGAGAAATCTACACCCTGTA
|
25
|
+
GCTCCGCCCTCTCCCAGCGTTTGTACCACGGCCTCGGCTTCAGTTTGACCTTGATATCATTGATGGGTACTGGTTTGCCAGGGGGGTGTTTGATAGGCAC
|
26
|
+
CATGTTAGGATCGATGGTGCTGTATTCCGGCAGAGCATCCTGTAGATATTTCAAGTCGCTGTCTAACCTCTTCTCTAACTTCAACATCTCAACCTTCTGG
|
27
|
+
ATGAGTGGGTTGTACAACTCAAAACAAATTTCAACTCCTAAATCGTTAATGACGTTGCGAAGGATGAATGTAGCTCCCAGCCCATAGCCACCTCTTCTTG
|
28
|
+
TACAAATACCAACAAATCGGTTGGATTTACCCTTTGCATATATGTCCGCTGTTGTCACTGCCATTATGCTGCCAACATAGAATTCAGGGATATCGATGAC
|
29
|
+
TTTCCTTCTCTTGTAGCAATCCTGTCTCTCTAAGTAATCAAGGAGTTTTGACCGTGTTCCCATCGGTGGCAATAACTCTGGACTTGTAAAGCGCTCTCTT
|
30
|
+
ATCTCACCGTACCCACTGCCATCGTCGTCATGTGTAAACTCGTGCTTCTTATCGGCTGTAGTTTCTGCAATGGTGATGTCATCTACCGATGTGGCGAAAT
|
31
|
+
TGCATTTACCAGCACCTGCAACCGAGACATGTTTATGTAGAGAACCAGCTCTACCGGCCAATCTTAAACTCGATCCAGCCCGCAATAAACACGAAGTTCG
|
32
|
+
CACCAGAACACATGCAGACGTCGTGGCCGCCATGATGAAT
|
33
|
+
>1108386 length 1440 cvg_65.7_tip_0
|
34
|
+
TTTTTTGTTGCACTTTTGAATCAAGCCCCTTGCTTTCTTTGTCTCTTACTGCTGTTTTTATGTCCTCATCAAAAAATTAGCATTTTAAAATAAACCCACC
|
35
|
+
ACCAGTAAATACTCTACCTGCTGAAAATTACACTCATAACAACAAACACAAACACAAGTTTTGGTCAGTTAAAGATGAGATTTAAGTATCATGCAGTAAA
|
36
|
+
GGATATGTTGTCCAGTCTGTTCAATCTTGTTTCCTCAGCTTTTTTTTTTAAAGAAGAAATTTAGAACTTTCACCGATACTTGATTCTTAAAGATATATAA
|
37
|
+
ACACACTCATGATCATCATGTGCGATTACTATAGAGTGACACAAGTAGGATACCACTACACACTTTACTTTATCAAGTAATAAACCACAAGGCCGGCTGG
|
38
|
+
GTAACTTTTGATTCATTTTGAGTTGAACGTTTCATTTTAAGCTGACATTAGATAAACGTGTTGTTGCATTTAACTTCTCGGCTAAAATGAACTTGTTCCC
|
39
|
+
TTTATTCCCTCAAATTCTTTTCAAACCGTAAATTTTAATTATATTCATTGTAAATATCTTGAGTATTTATTTCTATGGTTTATATCTTACTCTTCTATTA
|
40
|
+
ATAAGCGCTTTTAGTTGTGAGTATTAGAGTTATTAATAAAATGTTTTGCTCGTTATGATCCCCCAAAGGGGCTGTTCCACACATAACTGTTTTTGAATCT
|
41
|
+
TCAACAAATCTCATCAAATATGCTGGGATGAAACTAAAATAGACAATGAGAAGAGGGTGTGGAACAGTTTTAACTTGCGAGGGCCACGCTGATCTATTGC
|
42
|
+
TCATATGGTGGTGGCTTGCTATCAGGAGACTGGATAGCATCACTGTAGGTTGGCAGCTTGGGGAGCAGAGCCTCAGCATCCATGGCATAATTGCCAGATT
|
43
|
+
GGGCATTGTCTCCCATCTCACGGGCTGCGATGTACTTGTAGCATGACCACACACAGGCAATGCAGTAGGCCTTGACTACCATGATCATCATGAAGATGAC
|
44
|
+
CAGGGCAACGATGACAAACCACTGGTTGTCCAGATCCATCTTGTTGATGGGCAGCTTCTCCTTCCAGTGCATGAACTCCTGGTTGTACGGGAACTTGCCA
|
45
|
+
CTGTCTTCGTAGTCAAATTCTGGGTAGTATGACATTAGGCCGATAGCTGTCAGGCATGTGATGAAGAAATCAAACAGCTGCAGGCAGAAGAACGGCAGCA
|
46
|
+
GGTAACTTGACTGGTGCTGTGCAACTCCCTTAATCATCATGCTTGTGATCAGAAAGTAGCAACAGATGATGACCAGTCCAACACAGTAATCAGATGCATC
|
47
|
+
ATGCTGGTCCTCAATTGACAAACTGGTGGACGTGCTGTCCTCAGTCTTGACAGTGCCCTTGATGATCATAGATGCAATAAGCAGCATAGCACATACTTGC
|
48
|
+
CCAACCATGTGCCATAACCCGATCAAAACCGAGCCAGTTC
|
49
|
+
>1108388 length 1440 cvg_25.5_tip_1
|
50
|
+
CAGCTTATGTAGTTATCCATTGTATCCCACTGGCAGCATTGGAAACACAAAGACCATTAGAGCAGAAAGCAATGGTAATGTGTTCTCATTGGGTACTTTC
|
51
|
+
GCTCAGTAACTGGTGATTAGTGGTCAACCATAGCATGATCAAGGCCAACATTGTCATTTTGTGGATTGCTGTTTCGTTTACTGAATACTTCTAGGGTCTT
|
52
|
+
ATTAACTGAAGATACACCGCACGCCAGGTCAGCAGCCTGAGAGCTTAGACCGAAAGCAAGAGTGCTGGCTTCGCACACGCTCGTTTCTTTGATAGCTAAG
|
53
|
+
GAGTAAACACTGTCATCATCTGAGACCATGGAAGTTGCAGCACTGGGTCCTTTGGAATACTACATCATGGGATTGATACTCACTGTGGAAGCTATACTTG
|
54
|
+
GAACAATATGCTGTGTTCGTCTTCTCCTGGTGTATTTGAAAAATCCAACCCTCCACCAACCGCAGAGTCTTCTAGGCATCACACTGTGCATTGGAGACTT
|
55
|
+
GGGAATAGCTCTGATGTGCCCATTTGCCGCCTTTGCAAGCTTCAGTGAAACCTGGCCATTTGGAGATGAGTACTGCCAGCTGTATGCCTTTGCTGGGATG
|
56
|
+
CTGTTTGGTACACTCAGCATATCAGCTATGGCATGCTTGGCTTTGGACAAATATTACTCAAGCTCAAACGATGCTAAAGGGGGTTCTAGTCAGCCTTACA
|
57
|
+
TCTTGATTACATCAATCATCTGGCTAAACGCCCTCTTCTGGTCACTAACCCCACTGAGTCCCATCGGTTGGGGGCGCTATGCCATCGAACCGCCTAAATC
|
58
|
+
GACGTGCATGTTGGACTTTGCAAACCGTGAGCCATCATACATGATGTACTTGTTCTTAATGACAAGCACGGTCTATGCGTTGCCAGTAGGCGCCATCTTA
|
59
|
+
TGGTGCTTAGTGAAGCTCAGAAAGGGAAAAGATCCAAACAACGGAAAGAGCAAGGTATGTCTTTTGGTGTTGTTCTCATTGATTGTGTACTGGGGAGCCT
|
60
|
+
ATGGAATCGTAGCACTATGGGCAGCACTAGATGACATACATAATGTCCCGCTCCGATTGGTTGCTGCTGCTCCAATCCTGGCCAAGATCTGTCCAATCGG
|
61
|
+
AAACACTGTAATGCAGGTGCTGACCAATAGGAACATCCGTTGTCTGATGTACAGAAAGGAAACAGTTGCATCCAACAAGAGGGAATGAGGAGCATAATGC
|
62
|
+
AGAAAATAATTTCGTGTTATGATAGGTCTAATAATGCCGGTGTTCTCCTTTTCTTTGCACACGCTAAAGGCCATATTTTGTTACACAGCATTGTTTTTAT
|
63
|
+
TTCATATCGTTATTGTAGACTTGTATTGCCATTTAAGTTGCAATGTCAGATTTCCTTAGATTTTTTTTTTATCTGAACTAAGTTTGACAGATGGTGCTTT
|
64
|
+
TTTAAAACGAAAAAAAACCACAAATTTCCTTAAACCATGG
|
65
|
+
>1108390 length 1440 cvg_34.1_tip_1
|
66
|
+
GCAACCTTTTGGGTACAGAATGCGAGAAAATCTGGGCAGATCGGTCACAGTATTTTAAGGATTTCTAGGTTAACTTTGAATCAAATAAATTCAAACTCTA
|
67
|
+
GGAGGAAGATTATCACATCATGCTGCTGTCGTGGGATTCATCACAAACCGGTATTTGTTTCATTACTGAAAGACAAACTTAAAGGAAGTCTTAATAGGAG
|
68
|
+
CCTGGGTCCATGTTACTCCACAAATTCACATTTGTTTCCTAAAGAAGTGACTAGTCCGGAGAAACTCACGCAAGATGAAATAACGGCCCTGCACCAAAAT
|
69
|
+
ATTTGTAAGGAATTAATTACAGATGAGAGATCTTTACAAGAGGTTGCACAGTACTATTTTGATGGAAAAGGCAAAGCGTTTAGACCCATGTTGGTTTTAC
|
70
|
+
TCACGGCTGGTGCATGCAATACTCATACTCAGGGTTCTAATAGTAAACTGGTGGATACTCAGAGACGGATTGCAATGATAGCAGAGATGATCCACACAGC
|
71
|
+
GAGTCTCATGCATGATGATGTCATCGATAATGCAGATACAAGGAGGAACAAAACAGCAATCAATGAAATGTGGGGACAGAGAAAGGCAATTTTAGCTGGA
|
72
|
+
GACTTTGTTCTATCAGTTTCATCTCAGGTTTTAGCAAGAATAGGAAACGAGGAAGTTGTTTTAATTCTTTCCCAAGTCATAGAAGATCTTGTTAGAGGGG
|
73
|
+
AATTTATGCAACTTGGTTCTAAGGAGGATGAAAATGAGCGCTTTGCCCACTACCTCAAGAAGACGTTCAAGAAAACAGCAAGTCTCATGGCTTACAGCTG
|
74
|
+
TCAAGCAGTAGCAATTCTGGGTGGTTGCAGTGCTGAGGTGTGTCAGATAGCCTACGAGTACGGCCGGAATACAGGCATGGCATTCCAGCTGATAGATGAT
|
75
|
+
GTATTAGACTTTGTATCCAGTGACGCCGCTATGGGGAAACCGACGGCTGCTGACTTGAAGCTGGGCCTTGCAACCGGGCCAGTGCTGTTTGCTGCCGAGA
|
76
|
+
AGTTTCCAGAGTTGGATGCCATGATCATGAGACGGTTTAGCGAGACTGGTGATGTAATGGCTGCAAGGGAAGCTGTTGCCAAGACTGAAGGCATCGAGCA
|
77
|
+
GACAAGACACATCGCTAACCAGCACAGCTTAGAAGCTCAGAAACAGATCAGTAAACTACATCCCAGTCCAGAGAGGCAAGCACTTATAGAACTTGCACAG
|
78
|
+
CGAGTTGTTACAAGAATAAAGTAAATCCGTAGTTAAAATCCTTGTGTCAGTGCAAGAATGTTGGATCTCTAAAAACAGCCTTTATGCAATACAGGTTGTG
|
79
|
+
AAAAATGCACAATGTTGTCTTTCACCATAACTACCCCTGATGTGCACTAGTGTACACAGAAACCCGGTTGCGTCAAAATGAAATGGTGTGAGACAAGACG
|
80
|
+
TCCTAATGAAATAGAGGAAACACGTTTTGCATAGATTTCT
|
81
|
+
>1108392 length 1440 cvg_27.6_tip_1
|
82
|
+
ATTTGATTTGATTTGAAATGGTTAATTAAAGTAAAAAACTGTTTCACAGCCAGTAGGCTGAATTACATGATTTGTATTAGTTAAAAAATATTCAAATGTA
|
83
|
+
TTATAGAAATTACAATAAAAACAAACAAATAATGCATAGGCAGTGGAACAGAAAAATGAACGTGGGGATCCCCAAGGAAAACAAGATTTTTACAAGAGCA
|
84
|
+
CTTAAAATAAAACAAGGTAAAATAAGGGCACACCGCTGTGAACGACCCAAAACAATGCAATTAAGTAGATTTAATAATAATAAAAAAAAAACAATAAAAA
|
85
|
+
TTTTAATAATACGCGTGAGATAAGGAACTTGACTGTTTTAAACCATTCAGCTCTACAGCGATGTATAGTATGCTTTGTTGAACTCCGAGTTTGGTGAATT
|
86
|
+
CTTACGTTTCTTCGGTGTAGCAAATGACGGAGGCTTGTTAGTGAATTTTGCATGGATTATGCAAAAGAAAGGCACGATTGAAAGCACCTGCTCTCCCAGG
|
87
|
+
TAGGGAGCTGACAAGACAGTGCTTGAGCGCTTGTTCATATTGACTATAGTTGGTATCAAGTATAATCCTGCCTGCTCGCTTCTGTGTCCACTCATGGCTT
|
88
|
+
ATACACTGCTCCTTTGTTAGACCTCAAGATCAGACAACAGTCAGCATACTCCAGGACTAGCCTGACAATCGTCATGTAGTAAGTCAGACGGTCGCCTCGT
|
89
|
+
GGAAAAGTTGTCTAAATTGAGAGAATGAAGACATATTGATGACTTTTGGAAGGTTTAATGATGGACGCAATATGCGCATTCAGTTCCTAGCAATTTAACG
|
90
|
+
GTGTTATGTTGAATGATGACCTCTTTTTTCCCAAAGGAAACGTTCATTGTTGCGCACTTGTTCAGGTTTAGTTGTATTTGTTCCTGTCTGACCAATCATA
|
91
|
+
GGGTAACCAGATAGTATCAGGCAACATTGAATGAGCTGATGCATGGCGAGTCTCTACCACTGTTGGGTTGTTCACGTGTCTGAGAACGTGAAGAGCATTA
|
92
|
+
GCTGCATCACCAGTGGGCAGCATCATGTTGGTGTCTTTACCACATAGAGCATCTTTGATGAGCAACAGAAAAAGTATAGGTCCCAATTTAGTTCCTTGGG
|
93
|
+
GAACACCGGCTGTTGTTGACTGCCACACAGACTTCACACTTTTTGTACTTCACCCATTGTCTGCAATCTGTTAAAAAGTTTGCGAATTATGTTACCAGAC
|
94
|
+
TGACGTCAAGTTTGACCAACTTGGATATTGCGATTTCGTGGTCAATGCCATTAAATGCTTTGATGAAGTCAGTAAGGAGGACCGTCGACTCGTATGACAT
|
95
|
+
GAAAGAAACTCGAGGGGTCTCGCGAAACATTTACAATTTAAAACAGGATCATTAAGAATTAATTAAAGGAAGATCAGCGTATCGACTCTGACAAATACCA
|
96
|
+
GCAGGTTCAGGTTAAGGTTCAGGTTTATTTTCCATACCAT
|
97
|
+
>1108394 length 1440 cvg_66.6_tip_0
|
98
|
+
AGATAACGACGGGAAAAAGTCAAGATAATTTCACGTACACAACTCTTCAGAAGAAGAACGGAAGCAAGAAAGGATAAGATGATGGTGCGATTCGTAGCCT
|
99
|
+
TACTCGGGGCAGTCAGCCTACTGGTATGTCAATCTGCAGGACTTGATGCCGCAGACGTCGAGGAACAAGACGAGTTCAACAAACCCTATGCTCCTGACAG
|
100
|
+
TTCGTATGCGGATTTAAATGCACTTTTGGGCAACAATGTGCCAAGTCTACACAGCGCCTCCAAGCGTCAACAAAGTGACAGGGAGCGTGAGGTTGAAGCA
|
101
|
+
GCCCAGACGCAATTTTACCCGTATGGAAGAAGAACTGATCCCAGGAAAGCGTCTGGTGGATTCACCTTTGGCAAGAGAGGGCAGTATTTTATCCCCATTC
|
102
|
+
CGTACGAGAAACGAGAGATGGATGAGGTGAACCCGTACAGCGTAGCTAAGCGCGACGACGAGCTGACCGGACTAGAGGAGTACCAAGCTAGCAAGAGGTC
|
103
|
+
AGGTCCTTATTCCTTTAACAGCGGGCTGACCTTTGGCAAGAGGGAACCCGAGAAGAGGAACATATTCGGATCTTATGACTTCGGGAAGCGGGCTTACGGC
|
104
|
+
AACAATTTCAGCTTCGGCAAGCGAGGCATGGGAGTGTCCAGTTTTAGCTTTGGCAAACGATCCGGACTTGAGGGTGAACAAATGATGCCGGAAGACAAAC
|
105
|
+
GGGCGTTCGGAGACTTTTCCTTCGGCAAGCGCAATAATGGTCTGTCCAGCTTCACATTCGGCAAGCGAGAGGGTGAACGATAGAACACGAGAGGGCGCCA
|
106
|
+
TACTGTCTACAATGTGATAATTATAGTATCTTAATTATTTCAAAACCATACTTGATAAGAAATAACTGCTTGCGTTTTGAGTTAAAACATCAGTTCCAAG
|
107
|
+
TATACAAACAAATTTTAAATAACATTGTTTAAAGCACGGATTTTGAATTAAAATGAAACGATAACGCAAAAGTGTGTAAATATAAGCATAGTAAGCTATC
|
108
|
+
AACATCTAGAATTATGGAAGATATTTCAAGATACTAAAACTTGGAAGCTTTAGTTAATTTTAGATCTGGTTGAGATTCAAGTCTTCTTTTCTAGATTTTG
|
109
|
+
TTTCCAGCCAGGGCTTTTACTATCCTTAACGTTTGGGGAAATTGCCAGGGTTGTGGTCACCTTGTTAAGGTCCTAAACAACACATTTTTTGTTAGTAAAA
|
110
|
+
AATTACCCAATTAAGACAATTCATGAGTAAGCGACAAAAACTTAATAATTTAGCATATTTGCTTACGTTAGTTACTACAACAATAATTGAAACTTTCTTA
|
111
|
+
AACACGGTTTTTCTAGTATGCTTTCCTGGTCAGCAATTAACTTGTAGATGTTTTTCAAATACTATGAATTCATATCTACGTAGAAATCTTAAAGATGTTT
|
112
|
+
TCTGTGATTAAGTTTAGAACAACCTTAGCCAACAATCTCC
|
data/lib/npsearch.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'bio'
|
2
|
+
require 'english'
|
2
3
|
require 'fileutils'
|
3
4
|
|
4
5
|
require 'npsearch/arg_validator'
|
6
|
+
require 'npsearch/logger'
|
5
7
|
require 'npsearch/output'
|
6
8
|
require 'npsearch/pool'
|
7
9
|
require 'npsearch/scoresequence'
|
@@ -11,21 +13,23 @@ require 'npsearch/signalp'
|
|
11
13
|
# Top level module / namespace.
|
12
14
|
module NpSearch
|
13
15
|
class <<self
|
16
|
+
attr_accessor :logger
|
14
17
|
attr_accessor :opt
|
15
18
|
attr_accessor :sequences
|
16
19
|
attr_reader :sorted_sequences
|
17
20
|
|
18
21
|
def init(opt)
|
19
|
-
@opt
|
22
|
+
@opt = opt
|
23
|
+
ArgumentsValidators.run(opt)
|
20
24
|
@sequences = []
|
21
25
|
@sorted_sequences = nil
|
22
|
-
@pool =
|
23
|
-
|
24
|
-
extract_orf
|
26
|
+
@pool = initialise_thread_pool
|
27
|
+
create_temp_directory
|
28
|
+
extract_orf
|
25
29
|
end
|
26
30
|
|
27
31
|
def run
|
28
|
-
input_file = @opt[:type] == :
|
32
|
+
input_file = @opt[:type] == :genetic ? @opt[:orf] : @opt[:input_file]
|
29
33
|
iterate_input_file(input_file)
|
30
34
|
@sorted_sequences = @sequences.sort_by(&:score).reverse
|
31
35
|
Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
|
@@ -33,16 +37,37 @@ module NpSearch
|
|
33
37
|
remove_temp_dir
|
34
38
|
end
|
35
39
|
|
40
|
+
def logger
|
41
|
+
@logger ||= Logger.new(STDOUT, @opt[:debug])
|
42
|
+
end
|
43
|
+
|
36
44
|
private
|
37
45
|
|
46
|
+
def initialise_thread_pool
|
47
|
+
return if @opt[:num_threads] == 1
|
48
|
+
logger.debug "Creating a thread pool of size #{@opt[:num_threads]}"
|
49
|
+
Pool.new(@opt[:num_threads])
|
50
|
+
end
|
51
|
+
|
52
|
+
def create_temp_directory
|
53
|
+
FileUtils.mkdir_p(@opt[:temp_dir])
|
54
|
+
logger.debug "Successfully creating temp directory at: #{@opt[:temp_dir]}"
|
55
|
+
end
|
56
|
+
|
38
57
|
# Uses getorf from EMBOSS package to extract all ORF
|
39
58
|
def extract_orf(input = @opt[:input_file], minsize = 90)
|
59
|
+
return if @opt[:type] == :protein
|
60
|
+
logger.debug 'Attempting to extract ORF.'
|
40
61
|
@opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
|
41
|
-
|
42
|
-
|
62
|
+
cmd = "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
|
63
|
+
" -minsize #{minsize} >/dev/null 2>&1"
|
64
|
+
logger.debug "Running: #{cmd}"
|
65
|
+
system(cmd)
|
66
|
+
logger.debug("EGexit Code: #{$CHILD_STATUS.exitstatus}")
|
43
67
|
end
|
44
68
|
|
45
69
|
def iterate_input_file(input_file)
|
70
|
+
logger.debug "Iterating the Input File: #{input_file}"
|
46
71
|
Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
|
47
72
|
if @opt[:num_threads] > 1
|
48
73
|
@pool.schedule(entry) { |e| initialise_seqs(e) }
|
@@ -54,10 +79,10 @@ module NpSearch
|
|
54
79
|
end
|
55
80
|
|
56
81
|
def initialise_seqs(entry)
|
82
|
+
logger.debug "-- Analysing: '#{entry.definition}' (#{entry.aaseq.length})"
|
57
83
|
return if entry.aaseq.length > @opt[:max_orf_length]
|
58
84
|
sp = Signalp.analyse_sequence(entry.aaseq.to_s)
|
59
85
|
return if sp[:sp] == 'N'
|
60
|
-
# seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
|
61
86
|
seq = Sequence.new(entry, sp)
|
62
87
|
ScoreSequence.run(seq, @opt)
|
63
88
|
@sequences << seq
|
@@ -65,6 +90,7 @@ module NpSearch
|
|
65
90
|
|
66
91
|
def remove_temp_dir
|
67
92
|
return unless File.directory?(@opt[:temp_dir])
|
93
|
+
logger.debug "Deleting Temporary directory: #{@opt[:temp_dir]}"
|
68
94
|
FileUtils.rm_rf(@opt[:temp_dir])
|
69
95
|
end
|
70
96
|
end
|
@@ -1,69 +1,86 @@
|
|
1
1
|
require 'bio'
|
2
|
+
require 'forwardable'
|
2
3
|
|
3
4
|
# Top level module / namespace.
|
4
5
|
module NpSearch
|
5
6
|
# A class that validates the command line opts
|
6
7
|
class ArgumentsValidators
|
7
8
|
class << self
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators NpSearch, :logger
|
11
|
+
|
8
12
|
def run(opt)
|
9
13
|
assert_file_present('input fasta file', opt[:input_file])
|
10
14
|
opt[:input_file] = File.expand_path(opt[:input_file])
|
11
15
|
assert_input_file_not_empty(opt[:input_file])
|
12
16
|
assert_input_file_probably_fasta(opt[:input_file])
|
13
|
-
opt[:type]
|
17
|
+
opt[:type] = assert_input_sequence(opt[:input_file])
|
14
18
|
opt[:num_threads] = check_num_threads(opt[:num_threads])
|
15
19
|
assert_binaries('SignalP 4.1 Script', opt[:signalp_path])
|
20
|
+
logger.debug "The validated OPT hash contains: #{opt}"
|
16
21
|
opt
|
17
22
|
end
|
18
23
|
|
19
24
|
private
|
20
25
|
|
21
26
|
def assert_file_present(desc, file, exit_code = 1)
|
27
|
+
logger.debug "Testing if the #{desc} exists: '#{file}'."
|
22
28
|
return if file && File.exist?(File.expand_path(file))
|
23
|
-
|
29
|
+
error_msg = "*** Error: Couldn't find the #{desc}: '#{file}'."
|
30
|
+
logger.fatal error_msg
|
31
|
+
$stderr.puts error_msg
|
24
32
|
exit exit_code
|
25
33
|
end
|
26
34
|
|
27
35
|
def assert_input_file_not_empty(file)
|
36
|
+
logger.debug "Testing if the input file ('#{file}') is empty."
|
28
37
|
return unless File.zero?(File.expand_path(file))
|
29
|
-
|
30
|
-
|
38
|
+
error_msg = "*** Error: The input_file ('#{file}') seems to be empty."
|
39
|
+
logger.fatal error_msg
|
40
|
+
$stderr.puts error_msg
|
31
41
|
exit 1
|
32
42
|
end
|
33
43
|
|
34
44
|
def assert_input_file_probably_fasta(file)
|
45
|
+
logger.debug("Testing whether the input, ('#{file}') is a fasta file.")
|
35
46
|
File.open(file, 'r') do |f|
|
36
|
-
fasta =
|
47
|
+
fasta = f.readline[0] == '>' ? true : false
|
37
48
|
return fasta if fasta
|
38
49
|
end
|
39
|
-
|
40
|
-
'
|
50
|
+
error_msg = "*** Error: The input file (#{file}) does not seems to be" \
|
51
|
+
' to be a fasta file.'
|
52
|
+
logger.fatal error_msg
|
53
|
+
$stderr.puts error_msg
|
41
54
|
exit 1
|
42
55
|
end
|
43
56
|
|
44
57
|
def assert_input_sequence(file)
|
45
58
|
type = type_of_sequences(file)
|
46
59
|
return type unless type.nil?
|
47
|
-
|
48
|
-
|
49
|
-
|
60
|
+
error_msg = '*** Error: The input files seems to contain a mixture of' \
|
61
|
+
' both protein and nucleotide data.' \
|
62
|
+
' Please correct this and try again.'
|
63
|
+
logger.fatal error_msg
|
64
|
+
$stderr.puts error_msg
|
50
65
|
exit 1
|
51
66
|
end
|
52
67
|
|
53
68
|
# determine file sequence type based on first 500 lines
|
54
69
|
def type_of_sequences(file)
|
70
|
+
logger.debug 'Checking the type of sequence in the input file based' \
|
71
|
+
' on the first 500 lines.'
|
55
72
|
fasta_content = File.foreach(file).first(500).join("\n")
|
56
73
|
# the first sequence does not need to have a fasta definition line
|
57
74
|
sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
|
58
75
|
# get all sequence types
|
59
76
|
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
60
77
|
.uniq.compact
|
78
|
+
logger.debug " The guessed typed of Sequences are: #{sequence_types}"
|
61
79
|
return nil if sequence_types.empty?
|
62
80
|
sequence_types.first if sequence_types.length == 1
|
63
81
|
end
|
64
82
|
|
65
83
|
def guess_sequence_type(seq)
|
66
|
-
# removing non-letter and ambiguous characters
|
67
84
|
cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
|
68
85
|
return nil if cleaned_sequence.length < 10 # conservative
|
69
86
|
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
@@ -71,20 +88,27 @@ module NpSearch
|
|
71
88
|
end
|
72
89
|
|
73
90
|
def check_num_threads(num_threads)
|
91
|
+
logger.debug "Checking the number of threads: #{num_threads}"
|
74
92
|
num_threads = Integer(num_threads)
|
75
93
|
unless num_threads > 0
|
76
|
-
|
77
|
-
|
94
|
+
warn_msg = 'Number of threads can not be lower than 0. Changing' \
|
95
|
+
' number of threads to 1'
|
96
|
+
logger.warn warn_msg
|
97
|
+
$stderr.puts warn_msg
|
78
98
|
num_threads = 1
|
79
99
|
end
|
80
100
|
return num_threads unless num_threads > 256
|
81
|
-
|
82
|
-
|
101
|
+
warn_msg = "Number of threads set at #{num_threads} is unusually high."
|
102
|
+
logger.warn warn_msg
|
103
|
+
$stderr.puts warn_msg
|
83
104
|
end
|
84
105
|
|
85
106
|
def assert_binaries(desc, bin)
|
107
|
+
logger.debug "Checking #{desc} binary at: #{bin}."
|
86
108
|
return if command?(bin.to_s)
|
87
|
-
|
109
|
+
warn_msg = "NpSearch is unable to use the #{desc} at #{bin}"
|
110
|
+
logger.warn warn_msg
|
111
|
+
$stderr.puts warn_msg
|
88
112
|
end
|
89
113
|
|
90
114
|
# Return `true` if the given command exists and is executable.
|
data/lib/npsearch/signalp.rb
CHANGED
@@ -8,15 +8,15 @@ module NpSearch
|
|
8
8
|
class Signalp
|
9
9
|
class << self
|
10
10
|
extend Forwardable
|
11
|
-
def_delegators NpSearch, :opt
|
11
|
+
def_delegators NpSearch, :opt, :logger
|
12
12
|
|
13
13
|
def analyse_sequence(seq)
|
14
14
|
sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
|
15
15
|
sp dmaxcut networks orf)
|
16
16
|
seqs = setup_analysis(seq)
|
17
17
|
sp_results = []
|
18
|
-
seqs.each do |
|
19
|
-
sp_results << run_signalp(
|
18
|
+
seqs.each do |sequence|
|
19
|
+
sp_results << run_signalp(sequence, sp_headers)
|
20
20
|
end
|
21
21
|
sp_results.sort_by { |h| h[:d] }.reverse[0]
|
22
22
|
end
|
@@ -24,10 +24,10 @@ module NpSearch
|
|
24
24
|
private
|
25
25
|
|
26
26
|
def run_signalp(seq, sp_headers)
|
27
|
-
|
27
|
+
timeout(300) do
|
28
28
|
cmd = "echo '>seq\n#{seq}\n' | #{opt[:signalp_path]} -t euk" \
|
29
|
-
|
30
|
-
stdin, stdout, stderr
|
29
|
+
' -f short -U 0.34 -u 0.34'
|
30
|
+
stdin, stdout, stderr = Open3.popen3(cmd)
|
31
31
|
out = stdout.gets(nil).split("\n").delete_if { |l| l[0] == '#' }
|
32
32
|
if out.nil? || out.empty?
|
33
33
|
print stdout
|
@@ -40,13 +40,13 @@ module NpSearch
|
|
40
40
|
stdin.close; stdout.close; stderr.close
|
41
41
|
end
|
42
42
|
rescue Timeout::Error
|
43
|
-
no_results = [0,0,1,1,1,1,1,1,1,'N',1,1, seq]
|
43
|
+
no_results = [0, 0, 1, 1, 1, 1, 1, 1, 1, 'N', 1, 1, seq]
|
44
44
|
return Hash[sp_headers.map(&:to_sym).zip(no_results)]
|
45
45
|
end
|
46
46
|
|
47
47
|
def setup_analysis(seq)
|
48
48
|
orfs = seq.scan(/(?=(M\w{#{opt[:min_orf_length]},}))./).flatten
|
49
|
-
|
49
|
+
opt[:type] == :protein || orfs.empty? || orfs.nil? ? [seq] : orfs
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/npsearch/version.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require_relative 'test_helper'
|
2
|
-
|
2
|
+
require 'npsearch'
|
3
3
|
require 'npsearch/arg_validator'
|
4
4
|
|
5
5
|
# Class to test the how well the CLI arguments are validated.
|
6
6
|
class TestInputArgumentValidator < Minitest::Test
|
7
7
|
def setup
|
8
|
+
NpSearch.logger = Logger.new(STDOUT, true)
|
8
9
|
@c = NpSearch::ArgumentsValidators
|
9
|
-
@opt = { num_threads: 1, min_orf_length: 30 }
|
10
|
+
@opt = { num_threads: 1, min_orf_length: 30, debug: true }
|
10
11
|
end
|
11
12
|
|
12
13
|
def test_assert_file_present
|
data/test/test_sequence.rb
CHANGED
@@ -17,7 +17,7 @@ class TestSequences < Minitest::Test
|
|
17
17
|
sp = { name: 'test_sequences', cmax: '0.492', cmax_pos: '31', ymax: '0.612',
|
18
18
|
ymax_pos: '31', smax: '0.950', smax_pos: '17', smean: '0.786',
|
19
19
|
d: '0.706', sp: 'Y', dmaxcut: '0.300', networks: 'SignalP-noTM',
|
20
|
-
orf: sequence.seq}
|
20
|
+
orf: sequence.seq }
|
21
21
|
@seq = NpSearch::Sequence.new(sequence, sp, 2)
|
22
22
|
end
|
23
23
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: npsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Moghul et al.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,8 +115,10 @@ files:
|
|
115
115
|
- bin/npsearch
|
116
116
|
- exemplar_data/README.md
|
117
117
|
- exemplar_data/genetic_data.fa
|
118
|
+
- exemplar_data/protein.fa
|
118
119
|
- lib/npsearch.rb
|
119
120
|
- lib/npsearch/arg_validator.rb
|
121
|
+
- lib/npsearch/logger.rb
|
120
122
|
- lib/npsearch/output.rb
|
121
123
|
- lib/npsearch/pool.rb
|
122
124
|
- lib/npsearch/scoresequence.rb
|