npsearch 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +3 -2
- data/Rakefile +14 -5
- data/bin/npsearch +45 -33
- data/lib/npsearch/arg_validator.rb +70 -241
- data/lib/npsearch/output.rb +6 -5
- data/lib/npsearch/pool.rb +1 -1
- data/lib/npsearch/scoresequence.rb +62 -60
- data/lib/npsearch/sequence.rb +12 -9
- data/lib/npsearch/signalp.rb +29 -10
- data/lib/npsearch/version.rb +1 -1
- data/lib/npsearch.rb +27 -52
- data/npsearch.gemspec +2 -1
- data/templates/contents.slim +3 -3
- data/test/files/mixed_content.fa +167 -0
- data/test/test_argument_validator.rb +50 -0
- data/test/test_helper.rb +1 -0
- data/test/test_sequence.rb +81 -0
- data/test/test_sequence_scoring.rb +142 -0
- metadata +27 -17
- data/test/files/1_protein.fa +0 -204
- data/test/files/2_orf.fa +0 -1330
- data/test/files/3_signalp_out.txt +0 -667
- data/test/files/4_secretome.fa +0 -6
- data/test/files/5_output.fa +0 -6
- data/test/files/5_output.html +0 -37
- data/test/test_np_search.rb +0 -122
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af22531e55865ab286dd6599917196765d72af12
|
4
|
+
data.tar.gz: 5a3bf459332ff8bc70c3c6e431cae9e09fe0494c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 899fed317d7ceb7a62d52fb2b3e0e24e835f630c058a9dacf05e267019a900c5c2357588e9f1bdd674731eb951a44f16d5898747efb872e6ae1ceaf5efb8acf4
|
7
|
+
data.tar.gz: 0aab6be7e635dd63b2e8e2d4d5eda128f7f39b41977f5b08fb090408dd612ac98d41d3fe60a086ea5a16d2cff56bccbce79b8168c3f2af2e3fff222b1657b908
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -32,8 +32,9 @@ NpSearch orders the results based on the following characteristics:
|
|
32
32
|
### Installation Requirements
|
33
33
|
* Ruby (>= 2.0.0)
|
34
34
|
* SignalP 4.1 (Available from [here](http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?signalp))
|
35
|
-
*
|
36
|
-
*
|
35
|
+
* CD-HIT (Available from [here](http://weizhongli-lab.org/cd-hit/) - Suggested Installation via [Homebrew](http://brew.sh) or [Linuxbrew](http://linuxbrew.sh) - `brew install homebrew/science/cd-hit`)
|
36
|
+
* EMBOSS (Available from [here](http://emboss.sourceforge.net) - Suggested Installation via [Homebrew](http://brew.sh) or [Linuxbrew](http://linuxbrew.sh) - `brew install homebrew/science/emboss`)
|
37
|
+
|
37
38
|
|
38
39
|
## Installation
|
39
40
|
Simply run the following command in the terminal.
|
data/Rakefile
CHANGED
@@ -1,14 +1,23 @@
|
|
1
|
-
require 'bundler/gem_tasks'
|
2
1
|
require 'rake/testtask'
|
3
2
|
|
4
3
|
task default: [:build]
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
|
5
|
+
desc 'Builds and installs'
|
6
|
+
task install: [:build] do
|
7
|
+
require_relative 'lib/npsearch/version'
|
8
|
+
sh "gem install ./npsearch-#{NpSearch::VERSION}.gem"
|
9
|
+
end
|
10
|
+
|
11
|
+
desc 'Runs tests, generates documentation, builds gem (default)'
|
12
|
+
task build: [:test] do
|
13
|
+
sh 'gem build npsearch.gemspec'
|
8
14
|
end
|
9
15
|
|
16
|
+
desc 'Runs tests'
|
10
17
|
task :test do
|
11
18
|
Rake::TestTask.new do |t|
|
12
|
-
t.
|
19
|
+
t.libs.push 'lib'
|
20
|
+
t.test_files = FileList['test/test_*.rb']
|
21
|
+
t.verbose = true
|
13
22
|
end
|
14
23
|
end
|
data/bin/npsearch
CHANGED
@@ -1,56 +1,53 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'optparse'
|
3
|
-
|
4
|
-
require '
|
5
|
-
require 'npsearch/arg_validator'
|
6
|
-
require 'npsearch/version'
|
3
|
+
require 'English'
|
4
|
+
require 'tempfile'
|
7
5
|
|
8
6
|
opt = {}
|
9
7
|
optparse = OptionParser.new do |opts|
|
10
8
|
opts.banner = <<Banner
|
9
|
+
* Description: A tool to identify novel neuropeptides.
|
11
10
|
|
12
|
-
* Usage: npsearch [Options]
|
13
|
-
|
14
|
-
* Mandatory Options:
|
11
|
+
* Usage: npsearch [Options] [Input File]
|
15
12
|
|
13
|
+
* Options
|
16
14
|
Banner
|
17
15
|
|
18
|
-
opt[:
|
19
|
-
opts.on('-
|
20
|
-
'Path to the input fasta file') do |f|
|
21
|
-
opt[:input_file] = f
|
22
|
-
end
|
23
|
-
|
24
|
-
opts.separator ''
|
25
|
-
opts.separator '* Optional Options:'
|
26
|
-
|
27
|
-
opt[:signalp_path] = File.join(ENV['HOME'], 'signalp/signalp')
|
28
|
-
opts.on('-s', '--signalp_path', String,
|
16
|
+
opt[:signalp_path] = 'signalp'
|
17
|
+
opts.on('-s', '--signalp_path path_to_signalp',
|
29
18
|
'The full path to the signalp script. This can be downloaded from',
|
30
19
|
' CBS. See https://www.github.com/wurmlab/NpSearch for more',
|
31
20
|
' information') do |p|
|
32
21
|
opt[:signalp_path] = p
|
33
22
|
end
|
34
23
|
|
35
|
-
opt[:
|
36
|
-
|
37
|
-
|
38
|
-
'
|
39
|
-
'
|
40
|
-
|
24
|
+
opt[:temp_dir] = File.join(Dir.pwd, '.temp',
|
25
|
+
Dir::Tmpname.make_tmpname('', nil))
|
26
|
+
opts.on('-d', '--temp_dir path_to_temp_dir',
|
27
|
+
'The full path to the temp dir. NpSearch will create the folder and',
|
28
|
+
' then delete the folder once it has finished using them.',
|
29
|
+
' Default: Hidden folder in the current working dirctory') do |p|
|
30
|
+
opt[:temp_dir] = p
|
41
31
|
end
|
42
32
|
|
43
33
|
opt[:num_threads] = 1
|
44
|
-
opts.on('-n', '--num_threads', Integer,
|
34
|
+
opts.on('-n', '--num_threads num_of_threads', Integer,
|
45
35
|
'The number of threads to use when analysing the input file') do |n|
|
46
36
|
opt[:num_threads] = n
|
47
37
|
end
|
48
38
|
|
49
|
-
opt[:
|
50
|
-
opts.on('-m', '--
|
39
|
+
opt[:min_orf_length] = 30
|
40
|
+
opts.on('-m', '--min_orf_length N', Integer,
|
51
41
|
'The minimum length of a potential neuropeptide precursor.',
|
52
42
|
' Default: 30') do |n|
|
53
|
-
opt[:
|
43
|
+
opt[:min_orf_length] = n
|
44
|
+
end
|
45
|
+
|
46
|
+
opt[:max_seq_length] = 600
|
47
|
+
opts.on('-m', '--max_seq_length N', Integer,
|
48
|
+
'The maximum length of a potential neuropeptide precursor.',
|
49
|
+
' Default: 600') do |n|
|
50
|
+
opt[:max_seq_length] = n
|
54
51
|
end
|
55
52
|
|
56
53
|
opts.on('-h', '--help', 'Display this screen') do
|
@@ -59,16 +56,31 @@ Banner
|
|
59
56
|
end
|
60
57
|
|
61
58
|
opts.on('-v', '--version', 'Shows version') do
|
59
|
+
require 'npsearch/version'
|
62
60
|
puts NpSearch::VERSION
|
63
61
|
exit
|
64
62
|
end
|
65
63
|
end
|
66
|
-
|
64
|
+
begin
|
65
|
+
optparse.parse!
|
66
|
+
if ARGV.length > 1
|
67
|
+
$stderr.puts "Error: It seems that you have #{ARGV.length} input fasta" \
|
68
|
+
' files. Please ensure that you have a single input fasta' \
|
69
|
+
" file\n"
|
70
|
+
exit 1
|
71
|
+
elsif ARGV.empty?
|
72
|
+
$stderr.puts optparse
|
73
|
+
exit 1
|
74
|
+
end
|
75
|
+
rescue OptionParser::ParseError
|
76
|
+
$stderr.print 'Error: ' + $ERROR_INFO.to_s + "\n"
|
77
|
+
exit 1
|
78
|
+
end
|
67
79
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
80
|
+
opt[:input_file] = ARGV[0]
|
81
|
+
|
82
|
+
require 'npsearch'
|
83
|
+
require 'npsearch/arg_validator'
|
72
84
|
|
73
85
|
NpSearch.init(opt)
|
74
86
|
NpSearch.run
|
@@ -1,264 +1,93 @@
|
|
1
|
+
require 'bio'
|
2
|
+
# Top level module / namespace.
|
1
3
|
module NpSearch
|
2
|
-
class
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
# A class that validates the command line opts
|
5
|
+
class ArgumentsValidators
|
6
|
+
class << self
|
7
|
+
def run(opt)
|
8
|
+
assert_file_present('input fasta file', opt[:input_file])
|
9
|
+
assert_input_file_not_empty(opt[:input_file])
|
10
|
+
assert_input_file_probably_fasta(opt[:input_file])
|
11
|
+
opt[:type] = assert_input_sequence(opt[:input_file])
|
12
|
+
opt[:num_threads] = check_num_threads(opt[:num_threads])
|
13
|
+
assert_binaries('SignalP 4.1 Script', opt[:signalp_path])
|
14
|
+
opt
|
15
|
+
end
|
10
16
|
|
11
|
-
|
12
|
-
def arg(motif, input, output_dir, orf_min_length, extract_orf,
|
13
|
-
signalp_file, help_banner)
|
14
|
-
comp_arg(input, motif, output_dir, extract_orf, help_banner)
|
15
|
-
input_type = guess_input_type(input)
|
16
|
-
extract_orf_conflict(input_type, extract_orf)
|
17
|
-
input_sp_file_conflict(input_type, signalp_file)
|
18
|
-
orf_min_length(orf_min_length)
|
19
|
-
input_type
|
20
|
-
end
|
17
|
+
private
|
21
18
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
return unless input.nil? || (motif.nil? && extract_orf == false)
|
28
|
-
puts help_banner
|
29
|
-
exit
|
30
|
-
end
|
19
|
+
def assert_file_present(desc, file, exit_code = 1)
|
20
|
+
return if file && File.exist?(File.expand_path(file))
|
21
|
+
$stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
|
22
|
+
exit exit_code
|
23
|
+
end
|
31
24
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
25
|
+
def assert_input_file_not_empty(file)
|
26
|
+
return unless File.zero?(File.expand_path(file))
|
27
|
+
$stderr.puts "*** Error: The input_file (#{file})" \
|
28
|
+
' seems to be empty.'
|
29
|
+
exit 1
|
30
|
+
end
|
37
31
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
input_file_format(input_file)
|
43
|
-
sequences = []
|
44
|
-
File.open(input_file, 'r') do |file_stream|
|
45
|
-
file_stream.readlines[0..100].each do |line|
|
46
|
-
sequences << line.to_s unless line.match(/^>/)
|
32
|
+
def assert_input_file_probably_fasta(file)
|
33
|
+
File.open(file, 'r') do |f|
|
34
|
+
fasta = (f.readline[0] == '>') ? true : false
|
35
|
+
return fasta if fasta
|
47
36
|
end
|
37
|
+
$stderr.puts "*** Error: The input_file (#{file})" \
|
38
|
+
' does not seems to be a fasta file.'
|
39
|
+
exit 1
|
48
40
|
end
|
49
|
-
type = Bio::Sequence.new(sequences).guess(0.8)
|
50
|
-
if type == Bio::Sequence::NA
|
51
|
-
input_type = 'genetic'
|
52
|
-
elsif type == Bio::Sequence::AA
|
53
|
-
input_type = 'protein'
|
54
|
-
end
|
55
|
-
input_type
|
56
|
-
end
|
57
41
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
if File.zero?(input_file)
|
66
|
-
fail ArgumentError("Critical Error: The input file '#{input_file}'" \
|
67
|
-
' is empty.')
|
42
|
+
def assert_input_sequence(file)
|
43
|
+
type = type_of_sequences(file)
|
44
|
+
return type unless type.nil?
|
45
|
+
$stderr.puts '*** Error: The input files seems to contain a mixture of'
|
46
|
+
$stderr.puts ' both protein and nucleotide data.'
|
47
|
+
$stderr.puts ' Please correct this and try again.'
|
48
|
+
exit 1
|
68
49
|
end
|
69
|
-
unless File.probably_fasta?(input_file)
|
70
|
-
fail ArgumentError("Critical Error: The input file '#{input_file}'" \
|
71
|
-
' does not seem to be in fasta format. Only' \
|
72
|
-
' input files in fasta format are supported.')
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Ensures that the extract_orf option is only used with genetic data.
|
77
|
-
def extract_orf_conflict(input_type, extract_orf)
|
78
|
-
return unless input_type == 'protein' && extract_orf == true
|
79
|
-
fail ArgumentError('Usage Error: Conflicting arguments detected:' \
|
80
|
-
' Protein data detected within the input file,' \
|
81
|
-
' when using the Extract_ORF option (option' \
|
82
|
-
' "-e"). This option is only available when' \
|
83
|
-
' input file contains genetic data.')
|
84
|
-
end
|
85
|
-
|
86
|
-
# Ensures that the protein data (or open reading frames) are supplied as
|
87
|
-
# the input file when the signal p output file is passed.
|
88
|
-
def input_sp_file_conflict(input_type, signalp_file)
|
89
|
-
return unless input_type == 'genetic' && !signalp_file.nil?
|
90
|
-
fail ArgumentError('Usage Error: Conflicting arguments detected' \
|
91
|
-
': Genetic data detected within the input file' \
|
92
|
-
' when using the Signal P Input Option (Option' \
|
93
|
-
' "-s"). The Signal P input Option requires the' \
|
94
|
-
' input of two files: the Signal P Script Result' \
|
95
|
-
' files (at the "-s" option) and the protein' \
|
96
|
-
' data file used to run the Signal P Script.')
|
97
|
-
end
|
98
50
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
class Validators
|
109
|
-
# Checks for the presence of the output directory; if not found, it asks
|
110
|
-
# the user whether they want to create the output directory.
|
111
|
-
def output_dir(output_dir)
|
112
|
-
unless File.directory? output_dir # If output_dir doesn't exist
|
113
|
-
fail IOError, "\n\nThe output directory deoes not exist\n\n"
|
114
|
-
end
|
115
|
-
rescue IOError
|
116
|
-
puts # a blank line
|
117
|
-
puts 'The output directory does not exist.'
|
118
|
-
puts # a blank line
|
119
|
-
puts "The directory '#{output_dir}' will be created in this location."
|
120
|
-
puts 'Do you to continue? [y/n]'
|
121
|
-
print '> '
|
122
|
-
inp = $stdin.gets.chomp
|
123
|
-
until inp.downcase == 'n' || inp.downcase == 'y' || inp == ''
|
124
|
-
puts # a blank line
|
125
|
-
puts "The input: '#{inp}' is not recognised - 'y' or 'n' are the" \
|
126
|
-
' only recognisable inputs.'
|
127
|
-
puts 'Please try again.'
|
128
|
-
puts "The directory '#{output_dir}' will be created in this" \
|
129
|
-
' location.'
|
130
|
-
puts 'Do you to continue? [y/n]'
|
131
|
-
print '> '
|
132
|
-
inp = $stdin.gets.chomp
|
51
|
+
def type_of_sequences(file)
|
52
|
+
fasta_content = IO.binread(file)
|
53
|
+
# the first sequence does not need to have a fasta definition line
|
54
|
+
sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
|
55
|
+
# get all sequence types
|
56
|
+
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
57
|
+
.uniq.compact
|
58
|
+
return nil if sequence_types.empty?
|
59
|
+
sequence_types.first if sequence_types.length == 1
|
133
60
|
end
|
134
|
-
if inp.downcase == 'y' || inp == ''
|
135
|
-
FileUtils.mkdir_p "#{output_dir}"
|
136
|
-
puts 'Created output directory...'
|
137
|
-
elsif inp.downcase == 'n'
|
138
|
-
raise ArgumentError('Critical Error: An output directory is' \
|
139
|
-
' required; please create an output directory' \
|
140
|
-
' and then try again.')
|
141
|
-
end
|
142
|
-
end
|
143
61
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
else
|
151
|
-
begin
|
152
|
-
fail IOError('The Signal P Script directory cannot be found at' \
|
153
|
-
" the following location: '#{signalp_dir}/'.")
|
154
|
-
rescue IOError
|
155
|
-
puts # a blank line
|
156
|
-
puts 'Error: The Signal P Script directory cannot be found at the' \
|
157
|
-
" following location: '#{signalp_dir}/'."
|
158
|
-
puts # a blank line
|
159
|
-
puts 'Please enter the full path or a relative path to the Signal' \
|
160
|
-
' P Script directory (i.e. to the folder containing the' \
|
161
|
-
' Signal P script). Refer to the online tutorial for more help'
|
162
|
-
print '> '
|
163
|
-
inp = $stdin.gets.chomp
|
164
|
-
until (File.exist? "#{signalp_dir}/signalp") ||
|
165
|
-
(File.exist? "#{inp}/signalp")
|
166
|
-
puts # a blank line
|
167
|
-
puts 'The Signal P directory cannot be found at the following' \
|
168
|
-
" location: '#{inp}'"
|
169
|
-
puts 'Please enter the full path or a relative path to the Signal' \
|
170
|
-
' Peptide directory again.'
|
171
|
-
print '> '
|
172
|
-
inp = $stdin.gets.chomp
|
173
|
-
end
|
174
|
-
signalp_directory = inp
|
175
|
-
puts # a blank line
|
176
|
-
puts "The Signal P directory has been found at '#{signalp_directory}'"
|
177
|
-
FileUtils.ln_s "#{signalp_directory}", "#{Dir.home}/SignalPeptide",
|
178
|
-
force: true
|
179
|
-
puts # a blank line
|
180
|
-
end
|
62
|
+
def guess_sequence_type(seq)
|
63
|
+
# removing non-letter and ambiguous characters
|
64
|
+
cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
|
65
|
+
return nil if cleaned_sequence.length < 10 # conservative
|
66
|
+
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
67
|
+
type == Bio::Sequence::NA ? :genetic : :protein
|
181
68
|
end
|
182
|
-
signalp_directory
|
183
|
-
end
|
184
69
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
return true
|
192
|
-
else
|
193
|
-
return false
|
70
|
+
def check_num_threads(num_threads)
|
71
|
+
num_threads = Integer(num_threads)
|
72
|
+
unless num_threads > 0
|
73
|
+
$stderr.puts 'Number of threads can not be lower than 0'
|
74
|
+
$stderr.puts 'Changing number of threads to 1'
|
75
|
+
num_threads = 1
|
194
76
|
end
|
77
|
+
return num_threads unless num_threads > 256
|
78
|
+
$stderr.puts "Number of threads set at #{num_threads} is" \
|
79
|
+
' unusually high.'
|
195
80
|
end
|
196
|
-
end
|
197
81
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
File.open('signalp_out.txt', 'r') do |file_stream|
|
202
|
-
secondline = file_stream.readlines[1]
|
203
|
-
row = secondline.gsub(/\s+/m, ' ').chomp.split(' ')
|
204
|
-
if row[1] != 'name' && row[4] != 'Ymax' && row[5] != 'pos' &&
|
205
|
-
row[9] != 'D'
|
206
|
-
return true
|
207
|
-
else
|
208
|
-
return false
|
209
|
-
end
|
82
|
+
def assert_binaries(desc, bin)
|
83
|
+
return if command?(bin.to_s)
|
84
|
+
$stderr.puts "NpSearch is unable to use the #{desc} at #{bin}"
|
210
85
|
end
|
211
|
-
end
|
212
86
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
# produced by the Signal P Script are conserved (via 'sp_column'
|
217
|
-
# Method).
|
218
|
-
def sp_results(signalp_output_file)
|
219
|
-
return if sp_version(signalp_output_file)
|
220
|
-
# i.e. if Signal P is the wrong version
|
221
|
-
if sp_column(signalp_output_file) # If wrong version but correct columns
|
222
|
-
puts # a blank line
|
223
|
-
puts 'Warning: The wrong version of signalp has been linked.' \
|
224
|
-
' However, the signal peptide output file still seems to' \
|
225
|
-
' be in the right format.'
|
226
|
-
else
|
227
|
-
puts # a blank line
|
228
|
-
puts 'Warning: The wrong version of the signal p has been linked' \
|
229
|
-
' and the signal peptide output is in an unrecognised format.'
|
230
|
-
puts 'Continuing may give you meaningless results.'
|
231
|
-
end
|
232
|
-
puts # a blank line
|
233
|
-
puts 'Do you still want to continue? [y/n]'
|
234
|
-
print '> '
|
235
|
-
inp = $stdin.gets.chomp
|
236
|
-
until inp.downcase == 'n' || inp.downcase == 'y'
|
237
|
-
puts # a blank line
|
238
|
-
puts "The input: '#{inp}' is not recognised - 'y' or 'n' are the" \
|
239
|
-
' only recognisable inputs.'
|
240
|
-
puts 'Please try again.'
|
87
|
+
# Return `true` if the given command exists and is executable.
|
88
|
+
def command?(command)
|
89
|
+
system("which #{command} > /dev/null 2>&1")
|
241
90
|
end
|
242
|
-
if inp.downcase == 'y'
|
243
|
-
puts 'Continuing.'
|
244
|
-
elsif inp.downcase == 'n'
|
245
|
-
fail IOError('Critical Error: NpSearch only supports SignalP 4.1' \
|
246
|
-
' (downloadable form CBS) Please ensure the version' \
|
247
|
-
' of the signal p script is downloaded.')
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
# Guesses the type of the data in the supplied motif. It ignores all
|
252
|
-
# non-word characters (e.g. '|' that is used for regex). It has a 90%
|
253
|
-
# threshold.
|
254
|
-
def motif_type(motif)
|
255
|
-
motif_seq = Bio::Sequence.new(motif.gsub(/\W/, ''))
|
256
|
-
type = motif_seq.guess(0.9)
|
257
|
-
return unless type.to_s != 'Bio::Sequence::AA'
|
258
|
-
fail IOError('Critical Error: There seems to be an error in' \
|
259
|
-
' processing the motif. Please ensure that the motif' \
|
260
|
-
' contains amino acid residues that you wish to search' \
|
261
|
-
' for.')
|
262
91
|
end
|
263
92
|
end
|
264
93
|
end
|
data/lib/npsearch/output.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'slim'
|
2
2
|
|
3
|
+
# Top level module / namespace.
|
3
4
|
module NpSearch
|
4
5
|
# Class that generates the output
|
5
6
|
class Output
|
@@ -8,17 +9,17 @@ module NpSearch
|
|
8
9
|
templates_path = File.expand_path(File.join(__FILE__, '../../../',
|
9
10
|
'templates/contents.slim'))
|
10
11
|
contents_temp = File.read(templates_path)
|
11
|
-
|
12
|
-
File.open("#{input_file}.
|
12
|
+
h_content = Slim::Template.new { contents_temp }.render(NpSearch)
|
13
|
+
File.open("#{input_file}.npsearch.html", 'w') { |f| f.puts h_content }
|
13
14
|
end
|
14
15
|
|
15
16
|
def to_fasta(input_file, sorted_sequences, input_type)
|
16
|
-
File.open("#{input_file}.
|
17
|
+
File.open("#{input_file}.npsearch.fa", 'w') do |f|
|
17
18
|
sorted_sequences.each do |s|
|
18
19
|
if input_type == :protein
|
19
|
-
f.puts ">#{s.
|
20
|
+
f.puts ">#{s.defline}\n#{s.signalp}#{s.seq}"
|
20
21
|
elsif input_type == :nucleotide
|
21
|
-
f.puts ">#{s.
|
22
|
+
f.puts ">#{s.defline}-(frame:#{s.translated_frame})"
|
22
23
|
f.puts "#{s.signalp}#{s.seq}"
|
23
24
|
end
|
24
25
|
end
|
data/lib/npsearch/pool.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
# From http://burgestrand.se/code/ruby-thread-pool/
|
3
3
|
#
|
4
|
-
# Copyright
|
4
|
+
# Copyright 2012, Kim Burgestrand kim@burgestrand.se
|
5
5
|
#
|
6
6
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
# of this software and associated documentation files (the "Software"), to deal
|