npsearch 0.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/npsearch +30 -121
- data/lib/npsearch.rb +18 -18
- data/lib/npsearch/output.rb +29 -0
- data/lib/npsearch/scoresequence.rb +109 -0
- data/lib/npsearch/sequence.rb +28 -16
- data/lib/npsearch/signalp.rb +2 -21
- data/lib/npsearch/version.rb +1 -1
- data/npsearch.gemspec +1 -1
- data/templates/contents.slim +54 -0
- metadata +8 -6
- data/lib/npsearch/logger.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97d1818dc3219f272bd1acb116dbed684e6b0dbe
|
4
|
+
data.tar.gz: dc03e0976e737f28ab93f6899d21278601dd6c4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a95520149d8ced28c465cfafa55eaefd3cec1c3cd0cc94796931b8017f940ee8df681ea254bbc063464e9ed67d713bca34da85ff8ffed84e0c5c576cd93ec85
|
7
|
+
data.tar.gz: 88da9e2e6a2bcbba268ecfbf3f7bc1ce4351d33430826bd6571b8ecdb74f087308cb4ed86f69ff8f80f3899420a252cb822a4d656b3c025b736a3ca0f9ff39c0
|
data/bin/npsearch
CHANGED
@@ -9,58 +9,48 @@ opt = {}
|
|
9
9
|
optparse = OptionParser.new do |opts|
|
10
10
|
opts.banner = <<Banner
|
11
11
|
|
12
|
-
* Usage: npsearch [Options] -i [Input File]
|
12
|
+
* Usage: npsearch [Options] -i [Input File]
|
13
13
|
|
14
14
|
* Mandatory Options:
|
15
15
|
|
16
16
|
Banner
|
17
17
|
|
18
18
|
opt[:input_file] = nil
|
19
|
-
opts.on('-i', '--input [file]',
|
19
|
+
opts.on('-i', '--input [file]',
|
20
|
+
'Path to the input fasta file') do |f|
|
20
21
|
opt[:input_file] = f
|
21
22
|
end
|
22
23
|
|
23
24
|
opts.separator ''
|
24
25
|
opts.separator '* Optional Options:'
|
25
26
|
|
26
|
-
opt[:
|
27
|
-
opts.on('-
|
28
|
-
'
|
29
|
-
'
|
30
|
-
'
|
31
|
-
|
32
|
-
' between each query and putting the motif query in speech marks',
|
33
|
-
' e.g. "KR|RR|R..R"',
|
34
|
-
' Advanced Users: Regular expressions are supported.') do |motif|
|
35
|
-
opt[:motif] = motif
|
27
|
+
opt[:signalp_path] = File.join(ENV['HOME'], 'signalp/signalp')
|
28
|
+
opts.on('-s', '--signalp_path', String,
|
29
|
+
'The full path to the signalp script. This can be downloaded from',
|
30
|
+
' CBS. See https://www.github.com/wurmlab/NpSearch for more',
|
31
|
+
' information') do |p|
|
32
|
+
opt[:signalp_path] = p
|
36
33
|
end
|
37
34
|
|
38
|
-
opt[:
|
39
|
-
opts.on('-
|
40
|
-
'
|
41
|
-
'
|
42
|
-
|
35
|
+
opt[:usearch_path] = File.join(ENV['HOME'], 'bin/uclust')
|
36
|
+
opts.on('-u', '--usearch_path', String,
|
37
|
+
'The full path to the usearch binary. This script can be downloaded',
|
38
|
+
' from .... See https://www.github.com/wurmlab/NpSearch for more',
|
39
|
+
' information') do |p|
|
40
|
+
opt[:usearch_path] = p
|
43
41
|
end
|
44
42
|
|
45
|
-
opt[:
|
46
|
-
opts.on('-
|
47
|
-
'
|
48
|
-
|
49
|
-
" (Version 4.x), downloadable from CBS. If this argument isn't ",
|
50
|
-
' suplied, then NpSearch will try to run a local version of the',
|
51
|
-
' Signal P script.') do |signalp_file|
|
52
|
-
opt[:signalp_file] = signalp_file
|
43
|
+
opt[:num_threads] = 1
|
44
|
+
opts.on('-n', '--num_threads', Integer,
|
45
|
+
'The number of threads to use when analysing the input file') do |n|
|
46
|
+
opt[:num_threads] = n
|
53
47
|
end
|
54
48
|
|
55
|
-
opt[:
|
56
|
-
opts.on('-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
opt[:verbose] = false
|
61
|
-
opts.on('-v', '--verbose', 'Provides more information on each step taken',
|
62
|
-
' in this program.') do
|
63
|
-
opt[:verbose] = true
|
49
|
+
opt[:orf_min_length] = 10
|
50
|
+
opts.on('-m', '--orf_min_length N', Integer,
|
51
|
+
'The minimum length of a potential neuropeptide precursor.',
|
52
|
+
' Default: 30') do |n|
|
53
|
+
opt[:orf_min_length] = n
|
64
54
|
end
|
65
55
|
|
66
56
|
opts.on('-h', '--help', 'Display this screen') do
|
@@ -68,98 +58,17 @@ Banner
|
|
68
58
|
exit
|
69
59
|
end
|
70
60
|
|
71
|
-
opts.on('--version', 'Shows version') do
|
61
|
+
opts.on('-v', '--version', 'Shows version') do
|
72
62
|
puts NpSearch::VERSION
|
73
63
|
exit
|
74
64
|
end
|
75
65
|
end
|
76
66
|
optparse.parse!
|
77
67
|
|
68
|
+
# Temporary hard coding my defaults...
|
69
|
+
opt[:num_threads] = 8
|
70
|
+
opt[:signalp_path] = '/Volumes/Data/data/programs/signalp-4.1/signalp'
|
71
|
+
opt[:usearch_path] = '/Volumes/Data/data/programs/bin/usearch'
|
72
|
+
|
78
73
|
NpSearch.init(opt)
|
79
74
|
NpSearch.run
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
# ############# Argument Validation...##############
|
85
|
-
# arg_vldr = NpSearch::ArgValidators.new(opt[:verbose])
|
86
|
-
# input_type = arg_vldr.arg(opt[:motif], opt[:input], opt[:output_dir],
|
87
|
-
# opt[:cut_off], opt[:extract_orf], opt[:signalp_file],
|
88
|
-
# optparse.help)
|
89
|
-
|
90
|
-
# ############# General Validation...##############
|
91
|
-
# vldr = NpSearch::Validators.new
|
92
|
-
# vldr.output_dir(opt[:output_dir])
|
93
|
-
# if opt[:signalp_file].nil? && opt[:extract_orf] == false
|
94
|
-
# sp_dir = vldr.signalp_dir
|
95
|
-
# end
|
96
|
-
|
97
|
-
# ############# Converting input file to Bio::FastaFormat. #############
|
98
|
-
# input_read = NpSearch::Input.read(opt[:input], input_type)
|
99
|
-
|
100
|
-
# ############# Extract_ORF #############
|
101
|
-
# if input_type == 'genetic'
|
102
|
-
# # Translate Sequences in all 6 frames
|
103
|
-
# translated = NpSearch::Translation.translate(input_read)
|
104
|
-
# translated.to_fasta('translated seq.', "#{opt[:output_dir]}/1_protein.fa")
|
105
|
-
# # Extract all possible ORF that are longer than the ORF_min_length
|
106
|
-
# orf = NpSearch::Translation.extract_orf(translated, opt[:cut_off])
|
107
|
-
# orf.to_fasta('Open Reading Frames', "#{opt[:output_dir]}/2_orf.fa")
|
108
|
-
|
109
|
-
# if opt[:extract_orf]
|
110
|
-
# puts "\nSuccess: All output files created in the directory:" \
|
111
|
-
# "#{opt[:output_dir]}'.\n "
|
112
|
-
# exit
|
113
|
-
# end
|
114
|
-
# end
|
115
|
-
|
116
|
-
# ############# Setting up more variables...##############
|
117
|
-
# if opt[:motif] == 'neuro_clv'
|
118
|
-
# motif = 'KK|KR|RR|' \
|
119
|
-
# 'R..R|R....R|R......R|H..R|H....R|H......R|K..R|K....R|K......R'
|
120
|
-
# else
|
121
|
-
# motif = opt[:motif]
|
122
|
-
# end
|
123
|
-
# vldr.motif_type(motif)
|
124
|
-
|
125
|
-
# if input_type == 'genetic'
|
126
|
-
# sp_input_file = "#{opt[:output_dir]}/2_orf.fa"
|
127
|
-
# sp_hash = orf
|
128
|
-
# file_number = 3
|
129
|
-
# else # i.e. if the input is protein
|
130
|
-
# sp_input_file = opt[:input]
|
131
|
-
# sp_hash = input_read
|
132
|
-
# file_number = 1
|
133
|
-
# end
|
134
|
-
|
135
|
-
# if opt[:signalp_file].nil?
|
136
|
-
# sp_out_file = "#{opt[:output_dir]}/#{file_number}_signalp_out.txt"
|
137
|
-
# file_number += 1
|
138
|
-
# NpSearch::Signalp.signalp(sp_dir, sp_input_file, sp_out_file)
|
139
|
-
# else
|
140
|
-
# sp_out_file = opt[:signalp_file]
|
141
|
-
# file_number = 1
|
142
|
-
# end
|
143
|
-
|
144
|
-
# ############# Signal P Results file Validation #############
|
145
|
-
# vldr.sp_results(sp_out_file)
|
146
|
-
|
147
|
-
# ############# Extract sequences with a signal peptide #############
|
148
|
-
# secretome = NpSearch::Analysis.parse(sp_out_file, sp_hash, motif)
|
149
|
-
# secretome.to_fasta('secretome file',
|
150
|
-
# "#{opt[:output_dir]}/#{file_number}_secretome.fa")
|
151
|
-
# file_number += 1
|
152
|
-
|
153
|
-
# ############# Remove any duplicate data #############
|
154
|
-
# flattened_seq = NpSearch::Analysis.flattener(secretome)
|
155
|
-
|
156
|
-
# ############# Creating Output Files #############
|
157
|
-
# flattened_seq.to_fasta('fasta output file',
|
158
|
-
# "#{opt[:output_dir]}/#{file_number}_output.fa")
|
159
|
-
# flattened_seq.to_html(motif,
|
160
|
-
# "#{opt[:output_dir]}/#{file_number}_output.html")
|
161
|
-
|
162
|
-
# ############# Success #############
|
163
|
-
# puts # a blank line.
|
164
|
-
# puts "Success: All output files created in the directory:'#{opt[:output_dir]}'."
|
165
|
-
# puts # a blank line
|
data/lib/npsearch.rb
CHANGED
@@ -2,38 +2,35 @@ require 'bio'
|
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
4
|
# require 'npsearch/arg_validator'
|
5
|
-
require 'npsearch/
|
5
|
+
require 'npsearch/output'
|
6
|
+
require 'npsearch/pool'
|
7
|
+
require 'npsearch/scoresequence'
|
6
8
|
require 'npsearch/sequence'
|
7
9
|
require 'npsearch/signalp'
|
8
|
-
require 'npsearch/pool'
|
9
10
|
|
10
11
|
# Top level module / namespace.
|
11
12
|
module NpSearch
|
12
13
|
class <<self
|
13
|
-
MIN_ORF_SIZE =
|
14
|
+
MIN_ORF_SIZE = 30 # amino acids (including potential signal peptide)
|
14
15
|
|
15
16
|
attr_accessor :opt
|
16
17
|
attr_accessor :sequences
|
17
|
-
|
18
|
-
def logger
|
19
|
-
@logger ||= Logger.new(STDERR, @opt[:verbose])
|
20
|
-
end
|
18
|
+
attr_reader :sorted_sequences
|
21
19
|
|
22
20
|
def init(opt)
|
23
21
|
# @opt = args_validation(opt)
|
24
22
|
@opt = opt
|
25
23
|
@sequences = []
|
26
|
-
@
|
24
|
+
@sorted_sequences = nil
|
27
25
|
@opt[:type] = guess_sequence_type
|
28
|
-
@opt[:signalp_path] = '/Volumes/Data/programs/signalp-4.1/signalp'
|
29
26
|
@pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
|
30
27
|
end
|
31
28
|
|
32
29
|
def run
|
33
30
|
iterate_input_file
|
34
|
-
|
35
|
-
|
36
|
-
@
|
31
|
+
@sorted_sequences = @sequences.sort_by(&:score).reverse
|
32
|
+
Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
|
33
|
+
Output.to_html(@opt[:input_file])
|
37
34
|
end
|
38
35
|
|
39
36
|
private
|
@@ -60,7 +57,10 @@ module NpSearch
|
|
60
57
|
|
61
58
|
def initialise_protein_seq(id, seq)
|
62
59
|
sp = Signalp.analyse_sequence(seq)
|
63
|
-
|
60
|
+
return unless sp[:sp] == 'Y'
|
61
|
+
seq = Sequence.new(id, seq, sp)
|
62
|
+
ScoreSequence.run(seq)
|
63
|
+
@sequences << seq
|
64
64
|
end
|
65
65
|
|
66
66
|
def initialise_transcriptomic_seq(id, naseq)
|
@@ -72,15 +72,15 @@ module NpSearch
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def initialise_orfs(id, orfs, frame)
|
75
|
-
idx = 0
|
76
75
|
orfs.each do |orf|
|
77
76
|
sp = Signalp.analyse_sequence(orf)
|
78
77
|
next if sp[:sp] == 'N'
|
79
|
-
seq
|
80
|
-
seq
|
81
|
-
seq.orf_index = idx
|
78
|
+
seq = Sequence.new(id, orf, sp, frame)
|
79
|
+
ScoreSequence.run(seq)
|
82
80
|
@sequences << seq
|
83
|
-
|
81
|
+
# The remaining ORF in this frame are simply shorter versions of the
|
82
|
+
# same orf so break loop once signal peptide is found.
|
83
|
+
break if sp[:sp] == 'Y'
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'slim'
|
2
|
+
|
3
|
+
module NpSearch
|
4
|
+
# Class that generates the output
|
5
|
+
class Output
|
6
|
+
class << self
|
7
|
+
def to_html(input_file)
|
8
|
+
templates_path = File.expand_path(File.join(__FILE__, '../../../',
|
9
|
+
'templates/contents.slim'))
|
10
|
+
contents_temp = File.read(templates_path)
|
11
|
+
html_content = Slim::Template.new { contents_temp }.render(NpSearch)
|
12
|
+
File.open("#{input_file}.out.html", 'w') { |f| f.puts html_content }
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_fasta(input_file, sorted_sequences, input_type)
|
16
|
+
File.open("#{input_file}.out.fa", 'w') do |f|
|
17
|
+
sorted_sequences.each do |s|
|
18
|
+
if input_type == :protein
|
19
|
+
f.puts ">#{s.id}\n#{s.signalp}#{s.seq}"
|
20
|
+
elsif input_type == :nucleotide
|
21
|
+
f.puts ">#{s.id}-(frame:#{s.translated_frame})"
|
22
|
+
f.puts "#{s.signalp}#{s.seq}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module NpSearch
|
5
|
+
# A class to score the Sequences
|
6
|
+
class ScoreSequence
|
7
|
+
class << self
|
8
|
+
DI_CLV = 'KR|RR|KK'
|
9
|
+
MONO_NP_CLV_2 = '[KR]..R'
|
10
|
+
MONO_NP_CLV_4 = '[KR]....R'
|
11
|
+
MONO_NP_CLV_6 = '[KR]......R'
|
12
|
+
NP_CLV = "(#{DI_CLV})|(#{MONO_NP_CLV_2})|(#{MONO_NP_CLV_4})|" \
|
13
|
+
"(#{MONO_NP_CLV_6})"
|
14
|
+
|
15
|
+
def run(sequence)
|
16
|
+
@sequence = sequence
|
17
|
+
split_into_neuropeptides
|
18
|
+
count_np_cleavage_sites
|
19
|
+
count_c_terminal_glycines
|
20
|
+
np_similarity
|
21
|
+
acidic_spacers
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def split_into_neuropeptides
|
27
|
+
potential_nps = []
|
28
|
+
results = @sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
|
29
|
+
headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
|
30
|
+
di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
|
31
|
+
results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
|
32
|
+
@sequence.potential_cleaved_nps = potential_nps
|
33
|
+
end
|
34
|
+
|
35
|
+
def count_np_cleavage_sites
|
36
|
+
@sequence.potential_cleaved_nps.each do |e|
|
37
|
+
count_dibasic_np_clv(e[:di_clv_end])
|
38
|
+
count_mono_basic_np_clv(e[:mono_2_clv_end], e[:mono_4_clv_end],
|
39
|
+
e[:mono_6_clv_end])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def count_dibasic_np_clv(dibasic_clv)
|
44
|
+
case dibasic_clv
|
45
|
+
when 'KR'
|
46
|
+
@sequence.score += 0.09
|
47
|
+
when 'RR', 'KK'
|
48
|
+
@sequence.score += 0.05
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def count_mono_basic_np_clv(mono_2_clv, mono_4_clv, mono_6_clv)
|
53
|
+
return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
|
54
|
+
@sequence.score += 0.02
|
55
|
+
end
|
56
|
+
|
57
|
+
# Counts the number of C-terminal glycines
|
58
|
+
def count_c_terminal_glycines
|
59
|
+
@sequence.potential_cleaved_nps.each do |e|
|
60
|
+
if e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
|
61
|
+
@sequence.score += 0.25
|
62
|
+
elsif e[:np] =~ /G$|GK$|GR$/
|
63
|
+
@sequence.score += 0.10
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def acidic_spacers
|
69
|
+
@sequence.potential_cleaved_nps.each do |e|
|
70
|
+
acidic_residue = e[:np].count('DE')
|
71
|
+
percentage_acidic = acidic_residue / e[:np].length
|
72
|
+
@sequence.score += 0.10 if percentage_acidic > 0.5
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def np_similarity
|
77
|
+
results = run_uclust
|
78
|
+
results.gsub!(/^[^C].*\n/, '')
|
79
|
+
results.each_line do |c|
|
80
|
+
cluster = c.split(/\t/)
|
81
|
+
no_of_seq_in_cluster = cluster[3].to_i
|
82
|
+
if no_of_seq_in_cluster > 1
|
83
|
+
@sequence.score += (0.15 * no_of_seq_in_cluster)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def run_uclust
|
89
|
+
f = Tempfile.new('uclust')
|
90
|
+
fo = Tempfile.new('uclust_out')
|
91
|
+
write_sequence_content_to_tempfile(f)
|
92
|
+
`usearch -cluster_fast #{f.path} -id 0.5 -uc #{fo.path} >/dev/null 2>&1`
|
93
|
+
IO.read(fo.path)
|
94
|
+
ensure
|
95
|
+
f.unlink
|
96
|
+
fo.unlink
|
97
|
+
end
|
98
|
+
|
99
|
+
def write_sequence_content_to_tempfile(tempfile)
|
100
|
+
content = ''
|
101
|
+
@sequence.potential_cleaved_nps.each_with_index do |e, i|
|
102
|
+
content += ">seq#{i}\n#{e[:np]}\n"
|
103
|
+
end
|
104
|
+
tempfile.write(content)
|
105
|
+
tempfile.close
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/npsearch/sequence.rb
CHANGED
@@ -1,25 +1,37 @@
|
|
1
|
-
require 'forwardable'
|
2
|
-
|
3
1
|
module NpSearch
|
4
2
|
# A class to hold sequence data
|
5
3
|
class Sequence
|
6
|
-
|
7
|
-
|
4
|
+
DI_NP_CLV = 'KR|KK|RR'
|
5
|
+
MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
7
|
+
attr_reader :id
|
8
|
+
attr_reader :signalp
|
9
|
+
attr_reader :seq
|
10
|
+
attr_reader :html_seq
|
11
|
+
attr_reader :signalp_output
|
12
|
+
attr_reader :translated_frame
|
14
13
|
attr_accessor :score
|
14
|
+
attr_accessor :potential_cleaved_nps
|
15
|
+
|
16
|
+
def initialize(id, seq, signalp_output, frame = nil)
|
17
|
+
@id = id
|
18
|
+
sp_cleavage_site_idx = signalp_output[:ymax_pos].to_i - 1
|
19
|
+
@signalp = seq[0..(sp_cleavage_site_idx - 1)]
|
20
|
+
@seq = seq[sp_cleavage_site_idx..-1]
|
21
|
+
@html_seq = format_seq_for_html
|
22
|
+
@translated_frame = frame
|
23
|
+
@score = 0
|
24
|
+
@potential_cleaved_nps = nil
|
25
|
+
end
|
15
26
|
|
16
|
-
def
|
17
|
-
@
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
27
|
+
def format_seq_for_html
|
28
|
+
seq = @seq.gsub(/C/, '<span class=cysteine>C</span>')
|
29
|
+
seq.gsub!(/#{DI_NP_CLV}/i, '<span class=np_clv>\0</span>')
|
30
|
+
seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target R >>
|
31
|
+
seq.gsub!('R::NP_CLV::', '<span class=mono_np_clv>R</span>')
|
32
|
+
seq.gsub!('G<span class=np_clv>',
|
33
|
+
'<span class=glycine>G</span><span class=np_clv>')
|
34
|
+
"<span class=signalp>#{@signalp}</span><span class=seq>#{seq}</span>"
|
23
35
|
end
|
24
36
|
end
|
25
37
|
end
|
data/lib/npsearch/signalp.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'csv'
|
2
1
|
require 'forwardable'
|
3
2
|
require 'tempfile'
|
4
3
|
|
@@ -7,28 +6,10 @@ module NpSearch
|
|
7
6
|
class Signalp
|
8
7
|
class << self
|
9
8
|
extend Forwardable
|
10
|
-
def_delegators NpSearch, :opt
|
11
|
-
|
12
|
-
def run
|
13
|
-
end
|
14
|
-
|
15
|
-
def analyse_file(file)
|
16
|
-
sp_out = []
|
17
|
-
sp_headers = %w(name cmax cmax-pos ymax ymax-pos smax smax-pos smean d
|
18
|
-
sp dmaxcut networks)
|
19
|
-
output = `#{opt[:signalp_path]} -t euk -f short -U 0.34 -u 0.34 \
|
20
|
-
#{file}`
|
21
|
-
lines = CSV.parse(output.gsub(/ +/, ','), col_sep: ',',
|
22
|
-
skip_lines: /^#/,
|
23
|
-
header_converters: :symbol,
|
24
|
-
converters: :all,
|
25
|
-
headers: sp_headers)
|
26
|
-
lines.each { |line| sp_out << line.to_hash }
|
27
|
-
sp_out
|
28
|
-
end
|
9
|
+
def_delegators NpSearch, :opt
|
29
10
|
|
30
11
|
def analyse_sequence(seq)
|
31
|
-
sp_headers = %w(name cmax
|
12
|
+
sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
|
32
13
|
sp dmaxcut networks)
|
33
14
|
f = Tempfile.new('signalp')
|
34
15
|
f.write(">seq\n#{seq}")
|
data/lib/npsearch/version.rb
CHANGED
data/npsearch.gemspec
CHANGED
@@ -0,0 +1,54 @@
|
|
1
|
+
doctype html
|
2
|
+
html lang="en"
|
3
|
+
head
|
4
|
+
meta charset="utf-8"
|
5
|
+
meta content="IE=edge" http-equiv="X-UA-Compatible"
|
6
|
+
meta content="width=device-width, initial-scale=1" name="viewport"
|
7
|
+
meta content="NpSearch | Identify Novel Neuropeptides" name="description"
|
8
|
+
meta content="Wurmlab" name="author"
|
9
|
+
title NpSearch | Identify Novel Neuropeptides
|
10
|
+
css:
|
11
|
+
html { position: relative; min-height: 100%; }
|
12
|
+
body {margin:0 0 100px; background-color:#F5F5F5; font-family:"Helvetica Neue", Helvetica, Arial, sans-serif; font-size:15px; line-height:1.42857143; color:#2c3e50;}
|
13
|
+
.container {margin-right:auto; margin-left:auto; padding-left:15px; padding-right:15px; max-width:1170px; width:95%;}
|
14
|
+
footer {bottom:0; width:100%; margin:0 auto; position:absolute; height:100px; overflow:hidden; border-top:2px solid #DBDBDB; color:#b4bcc2;text-align:center;}
|
15
|
+
a{color:#18bc9c; text-decoration:none}a:hover,a:focus{color:#18bc9c; text-decoration:underline}
|
16
|
+
p {margin:0 0 10.5px}
|
17
|
+
.id {font-weight:bold;}
|
18
|
+
.sequence {word-break:break-all; font-family:Courier New, Courier, Mono;}
|
19
|
+
.signalp {color:#007AC0; font-weight:bold;}
|
20
|
+
.np_clv {color:#00B050; font-weight:bold;}
|
21
|
+
.mono_np_clv {color:#00B050;}
|
22
|
+
.glycine {color:#FF0000; font-weight:bold;}
|
23
|
+
.cysteine {text-decoration: underline; font-weight:bold;}
|
24
|
+
body
|
25
|
+
.container
|
26
|
+
h1 NpSearch Results
|
27
|
+
.results
|
28
|
+
- @sorted_sequences.each do |seq|
|
29
|
+
p.sequence
|
30
|
+
span.id
|
31
|
+
- if @opt[:type] == :protein
|
32
|
+
| >#{seq.id}
|
33
|
+
- elsif @opt[:type] == :nucleotide
|
34
|
+
| >#{seq.id}-(frame:#{seq.translated_frame})
|
35
|
+
br
|
36
|
+
span.seq== seq.html_seq
|
37
|
+
br
|
38
|
+
br
|
39
|
+
br
|
40
|
+
footer
|
41
|
+
p
|
42
|
+
| Please cite "Moghul I, Rowe M, Priyam A, Elphick M & Wurm Y
|
43
|
+
em
|
44
|
+
| (in prep)
|
45
|
+
| NpSearch: A tool to identify novel neuropeptides"
|
46
|
+
br
|
47
|
+
| Developed at
|
48
|
+
a href="https://wurmlab.github.io" target="_blank" Wurm Lab
|
49
|
+
| ,
|
50
|
+
a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
|
51
|
+
br
|
52
|
+
| This page was created by
|
53
|
+
a href="https://github.com/wurmlab/NpSearch" target="_blank" NpSearch
|
54
|
+
| v#{NpSearch::VERSION}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: npsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ismail Moghul
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2015-09-
|
15
|
+
date: 2015-09-14 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: bundler
|
@@ -71,19 +71,19 @@ dependencies:
|
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: '1.4'
|
73
73
|
- !ruby/object:Gem::Dependency
|
74
|
-
name:
|
74
|
+
name: slim
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|
76
76
|
requirements:
|
77
77
|
- - "~>"
|
78
78
|
- !ruby/object:Gem::Version
|
79
|
-
version: '
|
79
|
+
version: '3.0'
|
80
80
|
type: :runtime
|
81
81
|
prerelease: false
|
82
82
|
version_requirements: !ruby/object:Gem::Requirement
|
83
83
|
requirements:
|
84
84
|
- - "~>"
|
85
85
|
- !ruby/object:Gem::Version
|
86
|
-
version: '
|
86
|
+
version: '3.0'
|
87
87
|
description: Search for Neuropeptides based solely on the common neuropeptide markers
|
88
88
|
(e.g. signal peptide, dibasic cleavage sites etc.) i.e. not based on homology to
|
89
89
|
known neuropeptides.
|
@@ -104,12 +104,14 @@ files:
|
|
104
104
|
- bin/npsearch
|
105
105
|
- lib/npsearch.rb
|
106
106
|
- lib/npsearch/arg_validator.rb
|
107
|
-
- lib/npsearch/
|
107
|
+
- lib/npsearch/output.rb
|
108
108
|
- lib/npsearch/pool.rb
|
109
|
+
- lib/npsearch/scoresequence.rb
|
109
110
|
- lib/npsearch/sequence.rb
|
110
111
|
- lib/npsearch/signalp.rb
|
111
112
|
- lib/npsearch/version.rb
|
112
113
|
- npsearch.gemspec
|
114
|
+
- templates/contents.slim
|
113
115
|
- test/files/1_protein.fa
|
114
116
|
- test/files/2_orf.fa
|
115
117
|
- test/files/3_signalp_out.txt
|
data/lib/npsearch/logger.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
require 'logger'
|
2
|
-
|
3
|
-
module NpSearch
|
4
|
-
# Extend stdlib's Logger class for custom initialization and log format.
|
5
|
-
class Logger < Logger
|
6
|
-
def initialize(dev, verbose = false)
|
7
|
-
super dev
|
8
|
-
self.level = verbose ? DEBUG : INFO
|
9
|
-
self.formatter = proc { |_, datetime, _, msg| "#{datetime}: #{msg}\n" }
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|