npsearch 0.0.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/npsearch +30 -121
- data/lib/npsearch.rb +18 -18
- data/lib/npsearch/output.rb +29 -0
- data/lib/npsearch/scoresequence.rb +109 -0
- data/lib/npsearch/sequence.rb +28 -16
- data/lib/npsearch/signalp.rb +2 -21
- data/lib/npsearch/version.rb +1 -1
- data/npsearch.gemspec +1 -1
- data/templates/contents.slim +54 -0
- metadata +8 -6
- data/lib/npsearch/logger.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97d1818dc3219f272bd1acb116dbed684e6b0dbe
|
4
|
+
data.tar.gz: dc03e0976e737f28ab93f6899d21278601dd6c4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a95520149d8ced28c465cfafa55eaefd3cec1c3cd0cc94796931b8017f940ee8df681ea254bbc063464e9ed67d713bca34da85ff8ffed84e0c5c576cd93ec85
|
7
|
+
data.tar.gz: 88da9e2e6a2bcbba268ecfbf3f7bc1ce4351d33430826bd6571b8ecdb74f087308cb4ed86f69ff8f80f3899420a252cb822a4d656b3c025b736a3ca0f9ff39c0
|
data/bin/npsearch
CHANGED
@@ -9,58 +9,48 @@ opt = {}
|
|
9
9
|
optparse = OptionParser.new do |opts|
|
10
10
|
opts.banner = <<Banner
|
11
11
|
|
12
|
-
* Usage: npsearch [Options] -i [Input File]
|
12
|
+
* Usage: npsearch [Options] -i [Input File]
|
13
13
|
|
14
14
|
* Mandatory Options:
|
15
15
|
|
16
16
|
Banner
|
17
17
|
|
18
18
|
opt[:input_file] = nil
|
19
|
-
opts.on('-i', '--input [file]',
|
19
|
+
opts.on('-i', '--input [file]',
|
20
|
+
'Path to the input fasta file') do |f|
|
20
21
|
opt[:input_file] = f
|
21
22
|
end
|
22
23
|
|
23
24
|
opts.separator ''
|
24
25
|
opts.separator '* Optional Options:'
|
25
26
|
|
26
|
-
opt[:
|
27
|
-
opts.on('-
|
28
|
-
'
|
29
|
-
'
|
30
|
-
'
|
31
|
-
|
32
|
-
' between each query and putting the motif query in speech marks',
|
33
|
-
' e.g. "KR|RR|R..R"',
|
34
|
-
' Advanced Users: Regular expressions are supported.') do |motif|
|
35
|
-
opt[:motif] = motif
|
27
|
+
opt[:signalp_path] = File.join(ENV['HOME'], 'signalp/signalp')
|
28
|
+
opts.on('-s', '--signalp_path', String,
|
29
|
+
'The full path to the signalp script. This can be downloaded from',
|
30
|
+
' CBS. See https://www.github.com/wurmlab/NpSearch for more',
|
31
|
+
' information') do |p|
|
32
|
+
opt[:signalp_path] = p
|
36
33
|
end
|
37
34
|
|
38
|
-
opt[:
|
39
|
-
opts.on('-
|
40
|
-
'
|
41
|
-
'
|
42
|
-
|
35
|
+
opt[:usearch_path] = File.join(ENV['HOME'], 'bin/uclust')
|
36
|
+
opts.on('-u', '--usearch_path', String,
|
37
|
+
'The full path to the usearch binary. This script can be downloaded',
|
38
|
+
' from .... See https://www.github.com/wurmlab/NpSearch for more',
|
39
|
+
' information') do |p|
|
40
|
+
opt[:usearch_path] = p
|
43
41
|
end
|
44
42
|
|
45
|
-
opt[:
|
46
|
-
opts.on('-
|
47
|
-
'
|
48
|
-
|
49
|
-
" (Version 4.x), downloadable from CBS. If this argument isn't ",
|
50
|
-
' suplied, then NpSearch will try to run a local version of the',
|
51
|
-
' Signal P script.') do |signalp_file|
|
52
|
-
opt[:signalp_file] = signalp_file
|
43
|
+
opt[:num_threads] = 1
|
44
|
+
opts.on('-n', '--num_threads', Integer,
|
45
|
+
'The number of threads to use when analysing the input file') do |n|
|
46
|
+
opt[:num_threads] = n
|
53
47
|
end
|
54
48
|
|
55
|
-
opt[:
|
56
|
-
opts.on('-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
opt[:verbose] = false
|
61
|
-
opts.on('-v', '--verbose', 'Provides more information on each step taken',
|
62
|
-
' in this program.') do
|
63
|
-
opt[:verbose] = true
|
49
|
+
opt[:orf_min_length] = 10
|
50
|
+
opts.on('-m', '--orf_min_length N', Integer,
|
51
|
+
'The minimum length of a potential neuropeptide precursor.',
|
52
|
+
' Default: 30') do |n|
|
53
|
+
opt[:orf_min_length] = n
|
64
54
|
end
|
65
55
|
|
66
56
|
opts.on('-h', '--help', 'Display this screen') do
|
@@ -68,98 +58,17 @@ Banner
|
|
68
58
|
exit
|
69
59
|
end
|
70
60
|
|
71
|
-
opts.on('--version', 'Shows version') do
|
61
|
+
opts.on('-v', '--version', 'Shows version') do
|
72
62
|
puts NpSearch::VERSION
|
73
63
|
exit
|
74
64
|
end
|
75
65
|
end
|
76
66
|
optparse.parse!
|
77
67
|
|
68
|
+
# Temporary hard coding my defaults...
|
69
|
+
opt[:num_threads] = 8
|
70
|
+
opt[:signalp_path] = '/Volumes/Data/data/programs/signalp-4.1/signalp'
|
71
|
+
opt[:usearch_path] = '/Volumes/Data/data/programs/bin/usearch'
|
72
|
+
|
78
73
|
NpSearch.init(opt)
|
79
74
|
NpSearch.run
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
# ############# Argument Validation...##############
|
85
|
-
# arg_vldr = NpSearch::ArgValidators.new(opt[:verbose])
|
86
|
-
# input_type = arg_vldr.arg(opt[:motif], opt[:input], opt[:output_dir],
|
87
|
-
# opt[:cut_off], opt[:extract_orf], opt[:signalp_file],
|
88
|
-
# optparse.help)
|
89
|
-
|
90
|
-
# ############# General Validation...##############
|
91
|
-
# vldr = NpSearch::Validators.new
|
92
|
-
# vldr.output_dir(opt[:output_dir])
|
93
|
-
# if opt[:signalp_file].nil? && opt[:extract_orf] == false
|
94
|
-
# sp_dir = vldr.signalp_dir
|
95
|
-
# end
|
96
|
-
|
97
|
-
# ############# Converting input file to Bio::FastaFormat. #############
|
98
|
-
# input_read = NpSearch::Input.read(opt[:input], input_type)
|
99
|
-
|
100
|
-
# ############# Extract_ORF #############
|
101
|
-
# if input_type == 'genetic'
|
102
|
-
# # Translate Sequences in all 6 frames
|
103
|
-
# translated = NpSearch::Translation.translate(input_read)
|
104
|
-
# translated.to_fasta('translated seq.', "#{opt[:output_dir]}/1_protein.fa")
|
105
|
-
# # Extract all possible ORF that are longer than the ORF_min_length
|
106
|
-
# orf = NpSearch::Translation.extract_orf(translated, opt[:cut_off])
|
107
|
-
# orf.to_fasta('Open Reading Frames', "#{opt[:output_dir]}/2_orf.fa")
|
108
|
-
|
109
|
-
# if opt[:extract_orf]
|
110
|
-
# puts "\nSuccess: All output files created in the directory:" \
|
111
|
-
# "#{opt[:output_dir]}'.\n "
|
112
|
-
# exit
|
113
|
-
# end
|
114
|
-
# end
|
115
|
-
|
116
|
-
# ############# Setting up more variables...##############
|
117
|
-
# if opt[:motif] == 'neuro_clv'
|
118
|
-
# motif = 'KK|KR|RR|' \
|
119
|
-
# 'R..R|R....R|R......R|H..R|H....R|H......R|K..R|K....R|K......R'
|
120
|
-
# else
|
121
|
-
# motif = opt[:motif]
|
122
|
-
# end
|
123
|
-
# vldr.motif_type(motif)
|
124
|
-
|
125
|
-
# if input_type == 'genetic'
|
126
|
-
# sp_input_file = "#{opt[:output_dir]}/2_orf.fa"
|
127
|
-
# sp_hash = orf
|
128
|
-
# file_number = 3
|
129
|
-
# else # i.e. if the input is protein
|
130
|
-
# sp_input_file = opt[:input]
|
131
|
-
# sp_hash = input_read
|
132
|
-
# file_number = 1
|
133
|
-
# end
|
134
|
-
|
135
|
-
# if opt[:signalp_file].nil?
|
136
|
-
# sp_out_file = "#{opt[:output_dir]}/#{file_number}_signalp_out.txt"
|
137
|
-
# file_number += 1
|
138
|
-
# NpSearch::Signalp.signalp(sp_dir, sp_input_file, sp_out_file)
|
139
|
-
# else
|
140
|
-
# sp_out_file = opt[:signalp_file]
|
141
|
-
# file_number = 1
|
142
|
-
# end
|
143
|
-
|
144
|
-
# ############# Signal P Results file Validation #############
|
145
|
-
# vldr.sp_results(sp_out_file)
|
146
|
-
|
147
|
-
# ############# Extract sequences with a signal peptide #############
|
148
|
-
# secretome = NpSearch::Analysis.parse(sp_out_file, sp_hash, motif)
|
149
|
-
# secretome.to_fasta('secretome file',
|
150
|
-
# "#{opt[:output_dir]}/#{file_number}_secretome.fa")
|
151
|
-
# file_number += 1
|
152
|
-
|
153
|
-
# ############# Remove any duplicate data #############
|
154
|
-
# flattened_seq = NpSearch::Analysis.flattener(secretome)
|
155
|
-
|
156
|
-
# ############# Creating Output Files #############
|
157
|
-
# flattened_seq.to_fasta('fasta output file',
|
158
|
-
# "#{opt[:output_dir]}/#{file_number}_output.fa")
|
159
|
-
# flattened_seq.to_html(motif,
|
160
|
-
# "#{opt[:output_dir]}/#{file_number}_output.html")
|
161
|
-
|
162
|
-
# ############# Success #############
|
163
|
-
# puts # a blank line.
|
164
|
-
# puts "Success: All output files created in the directory:'#{opt[:output_dir]}'."
|
165
|
-
# puts # a blank line
|
data/lib/npsearch.rb
CHANGED
@@ -2,38 +2,35 @@ require 'bio'
|
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
4
|
# require 'npsearch/arg_validator'
|
5
|
-
require 'npsearch/
|
5
|
+
require 'npsearch/output'
|
6
|
+
require 'npsearch/pool'
|
7
|
+
require 'npsearch/scoresequence'
|
6
8
|
require 'npsearch/sequence'
|
7
9
|
require 'npsearch/signalp'
|
8
|
-
require 'npsearch/pool'
|
9
10
|
|
10
11
|
# Top level module / namespace.
|
11
12
|
module NpSearch
|
12
13
|
class <<self
|
13
|
-
MIN_ORF_SIZE =
|
14
|
+
MIN_ORF_SIZE = 30 # amino acids (including potential signal peptide)
|
14
15
|
|
15
16
|
attr_accessor :opt
|
16
17
|
attr_accessor :sequences
|
17
|
-
|
18
|
-
def logger
|
19
|
-
@logger ||= Logger.new(STDERR, @opt[:verbose])
|
20
|
-
end
|
18
|
+
attr_reader :sorted_sequences
|
21
19
|
|
22
20
|
def init(opt)
|
23
21
|
# @opt = args_validation(opt)
|
24
22
|
@opt = opt
|
25
23
|
@sequences = []
|
26
|
-
@
|
24
|
+
@sorted_sequences = nil
|
27
25
|
@opt[:type] = guess_sequence_type
|
28
|
-
@opt[:signalp_path] = '/Volumes/Data/programs/signalp-4.1/signalp'
|
29
26
|
@pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
|
30
27
|
end
|
31
28
|
|
32
29
|
def run
|
33
30
|
iterate_input_file
|
34
|
-
|
35
|
-
|
36
|
-
@
|
31
|
+
@sorted_sequences = @sequences.sort_by(&:score).reverse
|
32
|
+
Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
|
33
|
+
Output.to_html(@opt[:input_file])
|
37
34
|
end
|
38
35
|
|
39
36
|
private
|
@@ -60,7 +57,10 @@ module NpSearch
|
|
60
57
|
|
61
58
|
def initialise_protein_seq(id, seq)
|
62
59
|
sp = Signalp.analyse_sequence(seq)
|
63
|
-
|
60
|
+
return unless sp[:sp] == 'Y'
|
61
|
+
seq = Sequence.new(id, seq, sp)
|
62
|
+
ScoreSequence.run(seq)
|
63
|
+
@sequences << seq
|
64
64
|
end
|
65
65
|
|
66
66
|
def initialise_transcriptomic_seq(id, naseq)
|
@@ -72,15 +72,15 @@ module NpSearch
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def initialise_orfs(id, orfs, frame)
|
75
|
-
idx = 0
|
76
75
|
orfs.each do |orf|
|
77
76
|
sp = Signalp.analyse_sequence(orf)
|
78
77
|
next if sp[:sp] == 'N'
|
79
|
-
seq
|
80
|
-
seq
|
81
|
-
seq.orf_index = idx
|
78
|
+
seq = Sequence.new(id, orf, sp, frame)
|
79
|
+
ScoreSequence.run(seq)
|
82
80
|
@sequences << seq
|
83
|
-
|
81
|
+
# The remaining ORF in this frame are simply shorter versions of the
|
82
|
+
# same orf so break loop once signal peptide is found.
|
83
|
+
break if sp[:sp] == 'Y'
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'slim'
|
2
|
+
|
3
|
+
module NpSearch
|
4
|
+
# Class that generates the output
|
5
|
+
class Output
|
6
|
+
class << self
|
7
|
+
def to_html(input_file)
|
8
|
+
templates_path = File.expand_path(File.join(__FILE__, '../../../',
|
9
|
+
'templates/contents.slim'))
|
10
|
+
contents_temp = File.read(templates_path)
|
11
|
+
html_content = Slim::Template.new { contents_temp }.render(NpSearch)
|
12
|
+
File.open("#{input_file}.out.html", 'w') { |f| f.puts html_content }
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_fasta(input_file, sorted_sequences, input_type)
|
16
|
+
File.open("#{input_file}.out.fa", 'w') do |f|
|
17
|
+
sorted_sequences.each do |s|
|
18
|
+
if input_type == :protein
|
19
|
+
f.puts ">#{s.id}\n#{s.signalp}#{s.seq}"
|
20
|
+
elsif input_type == :nucleotide
|
21
|
+
f.puts ">#{s.id}-(frame:#{s.translated_frame})"
|
22
|
+
f.puts "#{s.signalp}#{s.seq}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module NpSearch
|
5
|
+
# A class to score the Sequences
|
6
|
+
class ScoreSequence
|
7
|
+
class << self
|
8
|
+
DI_CLV = 'KR|RR|KK'
|
9
|
+
MONO_NP_CLV_2 = '[KR]..R'
|
10
|
+
MONO_NP_CLV_4 = '[KR]....R'
|
11
|
+
MONO_NP_CLV_6 = '[KR]......R'
|
12
|
+
NP_CLV = "(#{DI_CLV})|(#{MONO_NP_CLV_2})|(#{MONO_NP_CLV_4})|" \
|
13
|
+
"(#{MONO_NP_CLV_6})"
|
14
|
+
|
15
|
+
def run(sequence)
|
16
|
+
@sequence = sequence
|
17
|
+
split_into_neuropeptides
|
18
|
+
count_np_cleavage_sites
|
19
|
+
count_c_terminal_glycines
|
20
|
+
np_similarity
|
21
|
+
acidic_spacers
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def split_into_neuropeptides
|
27
|
+
potential_nps = []
|
28
|
+
results = @sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
|
29
|
+
headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
|
30
|
+
di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
|
31
|
+
results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
|
32
|
+
@sequence.potential_cleaved_nps = potential_nps
|
33
|
+
end
|
34
|
+
|
35
|
+
def count_np_cleavage_sites
|
36
|
+
@sequence.potential_cleaved_nps.each do |e|
|
37
|
+
count_dibasic_np_clv(e[:di_clv_end])
|
38
|
+
count_mono_basic_np_clv(e[:mono_2_clv_end], e[:mono_4_clv_end],
|
39
|
+
e[:mono_6_clv_end])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def count_dibasic_np_clv(dibasic_clv)
|
44
|
+
case dibasic_clv
|
45
|
+
when 'KR'
|
46
|
+
@sequence.score += 0.09
|
47
|
+
when 'RR', 'KK'
|
48
|
+
@sequence.score += 0.05
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def count_mono_basic_np_clv(mono_2_clv, mono_4_clv, mono_6_clv)
|
53
|
+
return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
|
54
|
+
@sequence.score += 0.02
|
55
|
+
end
|
56
|
+
|
57
|
+
# Counts the number of C-terminal glycines
|
58
|
+
def count_c_terminal_glycines
|
59
|
+
@sequence.potential_cleaved_nps.each do |e|
|
60
|
+
if e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
|
61
|
+
@sequence.score += 0.25
|
62
|
+
elsif e[:np] =~ /G$|GK$|GR$/
|
63
|
+
@sequence.score += 0.10
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def acidic_spacers
|
69
|
+
@sequence.potential_cleaved_nps.each do |e|
|
70
|
+
acidic_residue = e[:np].count('DE')
|
71
|
+
percentage_acidic = acidic_residue / e[:np].length
|
72
|
+
@sequence.score += 0.10 if percentage_acidic > 0.5
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def np_similarity
|
77
|
+
results = run_uclust
|
78
|
+
results.gsub!(/^[^C].*\n/, '')
|
79
|
+
results.each_line do |c|
|
80
|
+
cluster = c.split(/\t/)
|
81
|
+
no_of_seq_in_cluster = cluster[3].to_i
|
82
|
+
if no_of_seq_in_cluster > 1
|
83
|
+
@sequence.score += (0.15 * no_of_seq_in_cluster)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def run_uclust
|
89
|
+
f = Tempfile.new('uclust')
|
90
|
+
fo = Tempfile.new('uclust_out')
|
91
|
+
write_sequence_content_to_tempfile(f)
|
92
|
+
`usearch -cluster_fast #{f.path} -id 0.5 -uc #{fo.path} >/dev/null 2>&1`
|
93
|
+
IO.read(fo.path)
|
94
|
+
ensure
|
95
|
+
f.unlink
|
96
|
+
fo.unlink
|
97
|
+
end
|
98
|
+
|
99
|
+
def write_sequence_content_to_tempfile(tempfile)
|
100
|
+
content = ''
|
101
|
+
@sequence.potential_cleaved_nps.each_with_index do |e, i|
|
102
|
+
content += ">seq#{i}\n#{e[:np]}\n"
|
103
|
+
end
|
104
|
+
tempfile.write(content)
|
105
|
+
tempfile.close
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/npsearch/sequence.rb
CHANGED
@@ -1,25 +1,37 @@
|
|
1
|
-
require 'forwardable'
|
2
|
-
|
3
1
|
module NpSearch
|
4
2
|
# A class to hold sequence data
|
5
3
|
class Sequence
|
6
|
-
|
7
|
-
|
4
|
+
DI_NP_CLV = 'KR|KK|RR'
|
5
|
+
MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
7
|
+
attr_reader :id
|
8
|
+
attr_reader :signalp
|
9
|
+
attr_reader :seq
|
10
|
+
attr_reader :html_seq
|
11
|
+
attr_reader :signalp_output
|
12
|
+
attr_reader :translated_frame
|
14
13
|
attr_accessor :score
|
14
|
+
attr_accessor :potential_cleaved_nps
|
15
|
+
|
16
|
+
def initialize(id, seq, signalp_output, frame = nil)
|
17
|
+
@id = id
|
18
|
+
sp_cleavage_site_idx = signalp_output[:ymax_pos].to_i - 1
|
19
|
+
@signalp = seq[0..(sp_cleavage_site_idx - 1)]
|
20
|
+
@seq = seq[sp_cleavage_site_idx..-1]
|
21
|
+
@html_seq = format_seq_for_html
|
22
|
+
@translated_frame = frame
|
23
|
+
@score = 0
|
24
|
+
@potential_cleaved_nps = nil
|
25
|
+
end
|
15
26
|
|
16
|
-
def
|
17
|
-
@
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
27
|
+
def format_seq_for_html
|
28
|
+
seq = @seq.gsub(/C/, '<span class=cysteine>C</span>')
|
29
|
+
seq.gsub!(/#{DI_NP_CLV}/i, '<span class=np_clv>\0</span>')
|
30
|
+
seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target R >>
|
31
|
+
seq.gsub!('R::NP_CLV::', '<span class=mono_np_clv>R</span>')
|
32
|
+
seq.gsub!('G<span class=np_clv>',
|
33
|
+
'<span class=glycine>G</span><span class=np_clv>')
|
34
|
+
"<span class=signalp>#{@signalp}</span><span class=seq>#{seq}</span>"
|
23
35
|
end
|
24
36
|
end
|
25
37
|
end
|
data/lib/npsearch/signalp.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'csv'
|
2
1
|
require 'forwardable'
|
3
2
|
require 'tempfile'
|
4
3
|
|
@@ -7,28 +6,10 @@ module NpSearch
|
|
7
6
|
class Signalp
|
8
7
|
class << self
|
9
8
|
extend Forwardable
|
10
|
-
def_delegators NpSearch, :opt
|
11
|
-
|
12
|
-
def run
|
13
|
-
end
|
14
|
-
|
15
|
-
def analyse_file(file)
|
16
|
-
sp_out = []
|
17
|
-
sp_headers = %w(name cmax cmax-pos ymax ymax-pos smax smax-pos smean d
|
18
|
-
sp dmaxcut networks)
|
19
|
-
output = `#{opt[:signalp_path]} -t euk -f short -U 0.34 -u 0.34 \
|
20
|
-
#{file}`
|
21
|
-
lines = CSV.parse(output.gsub(/ +/, ','), col_sep: ',',
|
22
|
-
skip_lines: /^#/,
|
23
|
-
header_converters: :symbol,
|
24
|
-
converters: :all,
|
25
|
-
headers: sp_headers)
|
26
|
-
lines.each { |line| sp_out << line.to_hash }
|
27
|
-
sp_out
|
28
|
-
end
|
9
|
+
def_delegators NpSearch, :opt
|
29
10
|
|
30
11
|
def analyse_sequence(seq)
|
31
|
-
sp_headers = %w(name cmax
|
12
|
+
sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
|
32
13
|
sp dmaxcut networks)
|
33
14
|
f = Tempfile.new('signalp')
|
34
15
|
f.write(">seq\n#{seq}")
|
data/lib/npsearch/version.rb
CHANGED
data/npsearch.gemspec
CHANGED
@@ -0,0 +1,54 @@
|
|
1
|
+
doctype html
|
2
|
+
html lang="en"
|
3
|
+
head
|
4
|
+
meta charset="utf-8"
|
5
|
+
meta content="IE=edge" http-equiv="X-UA-Compatible"
|
6
|
+
meta content="width=device-width, initial-scale=1" name="viewport"
|
7
|
+
meta content="NpSearch | Identify Novel Neuropeptides" name="description"
|
8
|
+
meta content="Wurmlab" name="author"
|
9
|
+
title NpSearch | Identify Novel Neuropeptides
|
10
|
+
css:
|
11
|
+
html { position: relative; min-height: 100%; }
|
12
|
+
body {margin:0 0 100px; background-color:#F5F5F5; font-family:"Helvetica Neue", Helvetica, Arial, sans-serif; font-size:15px; line-height:1.42857143; color:#2c3e50;}
|
13
|
+
.container {margin-right:auto; margin-left:auto; padding-left:15px; padding-right:15px; max-width:1170px; width:95%;}
|
14
|
+
footer {bottom:0; width:100%; margin:0 auto; position:absolute; height:100px; overflow:hidden; border-top:2px solid #DBDBDB; color:#b4bcc2;text-align:center;}
|
15
|
+
a{color:#18bc9c; text-decoration:none}a:hover,a:focus{color:#18bc9c; text-decoration:underline}
|
16
|
+
p {margin:0 0 10.5px}
|
17
|
+
.id {font-weight:bold;}
|
18
|
+
.sequence {word-break:break-all; font-family:Courier New, Courier, Mono;}
|
19
|
+
.signalp {color:#007AC0; font-weight:bold;}
|
20
|
+
.np_clv {color:#00B050; font-weight:bold;}
|
21
|
+
.mono_np_clv {color:#00B050;}
|
22
|
+
.glycine {color:#FF0000; font-weight:bold;}
|
23
|
+
.cysteine {text-decoration: underline; font-weight:bold;}
|
24
|
+
body
|
25
|
+
.container
|
26
|
+
h1 NpSearch Results
|
27
|
+
.results
|
28
|
+
- @sorted_sequences.each do |seq|
|
29
|
+
p.sequence
|
30
|
+
span.id
|
31
|
+
- if @opt[:type] == :protein
|
32
|
+
| >#{seq.id}
|
33
|
+
- elsif @opt[:type] == :nucleotide
|
34
|
+
| >#{seq.id}-(frame:#{seq.translated_frame})
|
35
|
+
br
|
36
|
+
span.seq== seq.html_seq
|
37
|
+
br
|
38
|
+
br
|
39
|
+
br
|
40
|
+
footer
|
41
|
+
p
|
42
|
+
| Please cite "Moghul I, Rowe M, Priyam A, Elphick M & Wurm Y
|
43
|
+
em
|
44
|
+
| (in prep)
|
45
|
+
| NpSearch: A tool to identify novel neuropeptides"
|
46
|
+
br
|
47
|
+
| Developed at
|
48
|
+
a href="https://wurmlab.github.io" target="_blank" Wurm Lab
|
49
|
+
| ,
|
50
|
+
a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
|
51
|
+
br
|
52
|
+
| This page was created by
|
53
|
+
a href="https://github.com/wurmlab/NpSearch" target="_blank" NpSearch
|
54
|
+
| v#{NpSearch::VERSION}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: npsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ismail Moghul
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2015-09-
|
15
|
+
date: 2015-09-14 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: bundler
|
@@ -71,19 +71,19 @@ dependencies:
|
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: '1.4'
|
73
73
|
- !ruby/object:Gem::Dependency
|
74
|
-
name:
|
74
|
+
name: slim
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|
76
76
|
requirements:
|
77
77
|
- - "~>"
|
78
78
|
- !ruby/object:Gem::Version
|
79
|
-
version: '
|
79
|
+
version: '3.0'
|
80
80
|
type: :runtime
|
81
81
|
prerelease: false
|
82
82
|
version_requirements: !ruby/object:Gem::Requirement
|
83
83
|
requirements:
|
84
84
|
- - "~>"
|
85
85
|
- !ruby/object:Gem::Version
|
86
|
-
version: '
|
86
|
+
version: '3.0'
|
87
87
|
description: Search for Neuropeptides based solely on the common neuropeptide markers
|
88
88
|
(e.g. signal peptide, dibasic cleavage sites etc.) i.e. not based on homology to
|
89
89
|
known neuropeptides.
|
@@ -104,12 +104,14 @@ files:
|
|
104
104
|
- bin/npsearch
|
105
105
|
- lib/npsearch.rb
|
106
106
|
- lib/npsearch/arg_validator.rb
|
107
|
-
- lib/npsearch/
|
107
|
+
- lib/npsearch/output.rb
|
108
108
|
- lib/npsearch/pool.rb
|
109
|
+
- lib/npsearch/scoresequence.rb
|
109
110
|
- lib/npsearch/sequence.rb
|
110
111
|
- lib/npsearch/signalp.rb
|
111
112
|
- lib/npsearch/version.rb
|
112
113
|
- npsearch.gemspec
|
114
|
+
- templates/contents.slim
|
113
115
|
- test/files/1_protein.fa
|
114
116
|
- test/files/2_orf.fa
|
115
117
|
- test/files/3_signalp_out.txt
|
data/lib/npsearch/logger.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
require 'logger'
|
2
|
-
|
3
|
-
module NpSearch
|
4
|
-
# Extend stdlib's Logger class for custom initialization and log format.
|
5
|
-
class Logger < Logger
|
6
|
-
def initialize(dev, verbose = false)
|
7
|
-
super dev
|
8
|
-
self.level = verbose ? DEBUG : INFO
|
9
|
-
self.formatter = proc { |_, datetime, _, msg| "#{datetime}: #{msg}\n" }
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|