npsearch 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +3 -2
- data/Rakefile +14 -5
- data/bin/npsearch +45 -33
- data/lib/npsearch/arg_validator.rb +70 -241
- data/lib/npsearch/output.rb +6 -5
- data/lib/npsearch/pool.rb +1 -1
- data/lib/npsearch/scoresequence.rb +62 -60
- data/lib/npsearch/sequence.rb +12 -9
- data/lib/npsearch/signalp.rb +29 -10
- data/lib/npsearch/version.rb +1 -1
- data/lib/npsearch.rb +27 -52
- data/npsearch.gemspec +2 -1
- data/templates/contents.slim +3 -3
- data/test/files/mixed_content.fa +167 -0
- data/test/test_argument_validator.rb +50 -0
- data/test/test_helper.rb +1 -0
- data/test/test_sequence.rb +81 -0
- data/test/test_sequence_scoring.rb +142 -0
- metadata +27 -17
- data/test/files/1_protein.fa +0 -204
- data/test/files/2_orf.fa +0 -1330
- data/test/files/3_signalp_out.txt +0 -667
- data/test/files/4_secretome.fa +0 -6
- data/test/files/5_output.fa +0 -6
- data/test/files/5_output.html +0 -37
- data/test/test_np_search.rb +0 -122
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af22531e55865ab286dd6599917196765d72af12
|
4
|
+
data.tar.gz: 5a3bf459332ff8bc70c3c6e431cae9e09fe0494c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 899fed317d7ceb7a62d52fb2b3e0e24e835f630c058a9dacf05e267019a900c5c2357588e9f1bdd674731eb951a44f16d5898747efb872e6ae1ceaf5efb8acf4
|
7
|
+
data.tar.gz: 0aab6be7e635dd63b2e8e2d4d5eda128f7f39b41977f5b08fb090408dd612ac98d41d3fe60a086ea5a16d2cff56bccbce79b8168c3f2af2e3fff222b1657b908
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -32,8 +32,9 @@ NpSearch orders the results based on the following characteristics:
|
|
32
32
|
### Installation Requirements
|
33
33
|
* Ruby (>= 2.0.0)
|
34
34
|
* SignalP 4.1 (Available from [here](http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?signalp))
|
35
|
-
*
|
36
|
-
*
|
35
|
+
* CD-HIT (Available from [here](http://weizhongli-lab.org/cd-hit/) - Suggested Installation via [Homebrew](http://brew.sh) or [Linuxbrew](http://linuxbrew.sh) - `brew install homebrew/science/cd-hit`)
|
36
|
+
* EMBOSS (Available from [here](http://emboss.sourceforge.net) - Suggested Installation via [Homebrew](http://brew.sh) or [Linuxbrew](http://linuxbrew.sh) - `brew install homebrew/science/emboss`)
|
37
|
+
|
37
38
|
|
38
39
|
## Installation
|
39
40
|
Simply run the following command in the terminal.
|
data/Rakefile
CHANGED
@@ -1,14 +1,23 @@
|
|
1
|
-
require 'bundler/gem_tasks'
|
2
1
|
require 'rake/testtask'
|
3
2
|
|
4
3
|
task default: [:build]
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
|
5
|
+
desc 'Builds and installs'
|
6
|
+
task install: [:build] do
|
7
|
+
require_relative 'lib/npsearch/version'
|
8
|
+
sh "gem install ./npsearch-#{NpSearch::VERSION}.gem"
|
9
|
+
end
|
10
|
+
|
11
|
+
desc 'Runs tests, generates documentation, builds gem (default)'
|
12
|
+
task build: [:test] do
|
13
|
+
sh 'gem build npsearch.gemspec'
|
8
14
|
end
|
9
15
|
|
16
|
+
desc 'Runs tests'
|
10
17
|
task :test do
|
11
18
|
Rake::TestTask.new do |t|
|
12
|
-
t.
|
19
|
+
t.libs.push 'lib'
|
20
|
+
t.test_files = FileList['test/test_*.rb']
|
21
|
+
t.verbose = true
|
13
22
|
end
|
14
23
|
end
|
data/bin/npsearch
CHANGED
@@ -1,56 +1,53 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'optparse'
|
3
|
-
|
4
|
-
require '
|
5
|
-
require 'npsearch/arg_validator'
|
6
|
-
require 'npsearch/version'
|
3
|
+
require 'English'
|
4
|
+
require 'tempfile'
|
7
5
|
|
8
6
|
opt = {}
|
9
7
|
optparse = OptionParser.new do |opts|
|
10
8
|
opts.banner = <<Banner
|
9
|
+
* Description: A tool to identify novel neuropeptides.
|
11
10
|
|
12
|
-
* Usage: npsearch [Options]
|
13
|
-
|
14
|
-
* Mandatory Options:
|
11
|
+
* Usage: npsearch [Options] [Input File]
|
15
12
|
|
13
|
+
* Options
|
16
14
|
Banner
|
17
15
|
|
18
|
-
opt[:
|
19
|
-
opts.on('-
|
20
|
-
'Path to the input fasta file') do |f|
|
21
|
-
opt[:input_file] = f
|
22
|
-
end
|
23
|
-
|
24
|
-
opts.separator ''
|
25
|
-
opts.separator '* Optional Options:'
|
26
|
-
|
27
|
-
opt[:signalp_path] = File.join(ENV['HOME'], 'signalp/signalp')
|
28
|
-
opts.on('-s', '--signalp_path', String,
|
16
|
+
opt[:signalp_path] = 'signalp'
|
17
|
+
opts.on('-s', '--signalp_path path_to_signalp',
|
29
18
|
'The full path to the signalp script. This can be downloaded from',
|
30
19
|
' CBS. See https://www.github.com/wurmlab/NpSearch for more',
|
31
20
|
' information') do |p|
|
32
21
|
opt[:signalp_path] = p
|
33
22
|
end
|
34
23
|
|
35
|
-
opt[:
|
36
|
-
|
37
|
-
|
38
|
-
'
|
39
|
-
'
|
40
|
-
|
24
|
+
opt[:temp_dir] = File.join(Dir.pwd, '.temp',
|
25
|
+
Dir::Tmpname.make_tmpname('', nil))
|
26
|
+
opts.on('-d', '--temp_dir path_to_temp_dir',
|
27
|
+
'The full path to the temp dir. NpSearch will create the folder and',
|
28
|
+
' then delete the folder once it has finished using them.',
|
29
|
+
' Default: Hidden folder in the current working dirctory') do |p|
|
30
|
+
opt[:temp_dir] = p
|
41
31
|
end
|
42
32
|
|
43
33
|
opt[:num_threads] = 1
|
44
|
-
opts.on('-n', '--num_threads', Integer,
|
34
|
+
opts.on('-n', '--num_threads num_of_threads', Integer,
|
45
35
|
'The number of threads to use when analysing the input file') do |n|
|
46
36
|
opt[:num_threads] = n
|
47
37
|
end
|
48
38
|
|
49
|
-
opt[:
|
50
|
-
opts.on('-m', '--
|
39
|
+
opt[:min_orf_length] = 30
|
40
|
+
opts.on('-m', '--min_orf_length N', Integer,
|
51
41
|
'The minimum length of a potential neuropeptide precursor.',
|
52
42
|
' Default: 30') do |n|
|
53
|
-
opt[:
|
43
|
+
opt[:min_orf_length] = n
|
44
|
+
end
|
45
|
+
|
46
|
+
opt[:max_seq_length] = 600
|
47
|
+
opts.on('-m', '--max_seq_length N', Integer,
|
48
|
+
'The maximum length of a potential neuropeptide precursor.',
|
49
|
+
' Default: 600') do |n|
|
50
|
+
opt[:max_seq_length] = n
|
54
51
|
end
|
55
52
|
|
56
53
|
opts.on('-h', '--help', 'Display this screen') do
|
@@ -59,16 +56,31 @@ Banner
|
|
59
56
|
end
|
60
57
|
|
61
58
|
opts.on('-v', '--version', 'Shows version') do
|
59
|
+
require 'npsearch/version'
|
62
60
|
puts NpSearch::VERSION
|
63
61
|
exit
|
64
62
|
end
|
65
63
|
end
|
66
|
-
|
64
|
+
begin
|
65
|
+
optparse.parse!
|
66
|
+
if ARGV.length > 1
|
67
|
+
$stderr.puts "Error: It seems that you have #{ARGV.length} input fasta" \
|
68
|
+
' files. Please ensure that you have a single input fasta' \
|
69
|
+
" file\n"
|
70
|
+
exit 1
|
71
|
+
elsif ARGV.empty?
|
72
|
+
$stderr.puts optparse
|
73
|
+
exit 1
|
74
|
+
end
|
75
|
+
rescue OptionParser::ParseError
|
76
|
+
$stderr.print 'Error: ' + $ERROR_INFO.to_s + "\n"
|
77
|
+
exit 1
|
78
|
+
end
|
67
79
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
80
|
+
opt[:input_file] = ARGV[0]
|
81
|
+
|
82
|
+
require 'npsearch'
|
83
|
+
require 'npsearch/arg_validator'
|
72
84
|
|
73
85
|
NpSearch.init(opt)
|
74
86
|
NpSearch.run
|
@@ -1,264 +1,93 @@
|
|
1
|
+
require 'bio'
|
2
|
+
# Top level module / namespace.
|
1
3
|
module NpSearch
|
2
|
-
class
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
# A class that validates the command line opts
|
5
|
+
class ArgumentsValidators
|
6
|
+
class << self
|
7
|
+
def run(opt)
|
8
|
+
assert_file_present('input fasta file', opt[:input_file])
|
9
|
+
assert_input_file_not_empty(opt[:input_file])
|
10
|
+
assert_input_file_probably_fasta(opt[:input_file])
|
11
|
+
opt[:type] = assert_input_sequence(opt[:input_file])
|
12
|
+
opt[:num_threads] = check_num_threads(opt[:num_threads])
|
13
|
+
assert_binaries('SignalP 4.1 Script', opt[:signalp_path])
|
14
|
+
opt
|
15
|
+
end
|
10
16
|
|
11
|
-
|
12
|
-
def arg(motif, input, output_dir, orf_min_length, extract_orf,
|
13
|
-
signalp_file, help_banner)
|
14
|
-
comp_arg(input, motif, output_dir, extract_orf, help_banner)
|
15
|
-
input_type = guess_input_type(input)
|
16
|
-
extract_orf_conflict(input_type, extract_orf)
|
17
|
-
input_sp_file_conflict(input_type, signalp_file)
|
18
|
-
orf_min_length(orf_min_length)
|
19
|
-
input_type
|
20
|
-
end
|
17
|
+
private
|
21
18
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
return unless input.nil? || (motif.nil? && extract_orf == false)
|
28
|
-
puts help_banner
|
29
|
-
exit
|
30
|
-
end
|
19
|
+
def assert_file_present(desc, file, exit_code = 1)
|
20
|
+
return if file && File.exist?(File.expand_path(file))
|
21
|
+
$stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
|
22
|
+
exit exit_code
|
23
|
+
end
|
31
24
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
25
|
+
def assert_input_file_not_empty(file)
|
26
|
+
return unless File.zero?(File.expand_path(file))
|
27
|
+
$stderr.puts "*** Error: The input_file (#{file})" \
|
28
|
+
' seems to be empty.'
|
29
|
+
exit 1
|
30
|
+
end
|
37
31
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
input_file_format(input_file)
|
43
|
-
sequences = []
|
44
|
-
File.open(input_file, 'r') do |file_stream|
|
45
|
-
file_stream.readlines[0..100].each do |line|
|
46
|
-
sequences << line.to_s unless line.match(/^>/)
|
32
|
+
def assert_input_file_probably_fasta(file)
|
33
|
+
File.open(file, 'r') do |f|
|
34
|
+
fasta = (f.readline[0] == '>') ? true : false
|
35
|
+
return fasta if fasta
|
47
36
|
end
|
37
|
+
$stderr.puts "*** Error: The input_file (#{file})" \
|
38
|
+
' does not seems to be a fasta file.'
|
39
|
+
exit 1
|
48
40
|
end
|
49
|
-
type = Bio::Sequence.new(sequences).guess(0.8)
|
50
|
-
if type == Bio::Sequence::NA
|
51
|
-
input_type = 'genetic'
|
52
|
-
elsif type == Bio::Sequence::AA
|
53
|
-
input_type = 'protein'
|
54
|
-
end
|
55
|
-
input_type
|
56
|
-
end
|
57
41
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
if File.zero?(input_file)
|
66
|
-
fail ArgumentError("Critical Error: The input file '#{input_file}'" \
|
67
|
-
' is empty.')
|
42
|
+
def assert_input_sequence(file)
|
43
|
+
type = type_of_sequences(file)
|
44
|
+
return type unless type.nil?
|
45
|
+
$stderr.puts '*** Error: The input files seems to contain a mixture of'
|
46
|
+
$stderr.puts ' both protein and nucleotide data.'
|
47
|
+
$stderr.puts ' Please correct this and try again.'
|
48
|
+
exit 1
|
68
49
|
end
|
69
|
-
unless File.probably_fasta?(input_file)
|
70
|
-
fail ArgumentError("Critical Error: The input file '#{input_file}'" \
|
71
|
-
' does not seem to be in fasta format. Only' \
|
72
|
-
' input files in fasta format are supported.')
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Ensures that the extract_orf option is only used with genetic data.
|
77
|
-
def extract_orf_conflict(input_type, extract_orf)
|
78
|
-
return unless input_type == 'protein' && extract_orf == true
|
79
|
-
fail ArgumentError('Usage Error: Conflicting arguments detected:' \
|
80
|
-
' Protein data detected within the input file,' \
|
81
|
-
' when using the Extract_ORF option (option' \
|
82
|
-
' "-e"). This option is only available when' \
|
83
|
-
' input file contains genetic data.')
|
84
|
-
end
|
85
|
-
|
86
|
-
# Ensures that the protein data (or open reading frames) are supplied as
|
87
|
-
# the input file when the signal p output file is passed.
|
88
|
-
def input_sp_file_conflict(input_type, signalp_file)
|
89
|
-
return unless input_type == 'genetic' && !signalp_file.nil?
|
90
|
-
fail ArgumentError('Usage Error: Conflicting arguments detected' \
|
91
|
-
': Genetic data detected within the input file' \
|
92
|
-
' when using the Signal P Input Option (Option' \
|
93
|
-
' "-s"). The Signal P input Option requires the' \
|
94
|
-
' input of two files: the Signal P Script Result' \
|
95
|
-
' files (at the "-s" option) and the protein' \
|
96
|
-
' data file used to run the Signal P Script.')
|
97
|
-
end
|
98
50
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
class Validators
|
109
|
-
# Checks for the presence of the output directory; if not found, it asks
|
110
|
-
# the user whether they want to create the output directory.
|
111
|
-
def output_dir(output_dir)
|
112
|
-
unless File.directory? output_dir # If output_dir doesn't exist
|
113
|
-
fail IOError, "\n\nThe output directory deoes not exist\n\n"
|
114
|
-
end
|
115
|
-
rescue IOError
|
116
|
-
puts # a blank line
|
117
|
-
puts 'The output directory does not exist.'
|
118
|
-
puts # a blank line
|
119
|
-
puts "The directory '#{output_dir}' will be created in this location."
|
120
|
-
puts 'Do you to continue? [y/n]'
|
121
|
-
print '> '
|
122
|
-
inp = $stdin.gets.chomp
|
123
|
-
until inp.downcase == 'n' || inp.downcase == 'y' || inp == ''
|
124
|
-
puts # a blank line
|
125
|
-
puts "The input: '#{inp}' is not recognised - 'y' or 'n' are the" \
|
126
|
-
' only recognisable inputs.'
|
127
|
-
puts 'Please try again.'
|
128
|
-
puts "The directory '#{output_dir}' will be created in this" \
|
129
|
-
' location.'
|
130
|
-
puts 'Do you to continue? [y/n]'
|
131
|
-
print '> '
|
132
|
-
inp = $stdin.gets.chomp
|
51
|
+
def type_of_sequences(file)
|
52
|
+
fasta_content = IO.binread(file)
|
53
|
+
# the first sequence does not need to have a fasta definition line
|
54
|
+
sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
|
55
|
+
# get all sequence types
|
56
|
+
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
57
|
+
.uniq.compact
|
58
|
+
return nil if sequence_types.empty?
|
59
|
+
sequence_types.first if sequence_types.length == 1
|
133
60
|
end
|
134
|
-
if inp.downcase == 'y' || inp == ''
|
135
|
-
FileUtils.mkdir_p "#{output_dir}"
|
136
|
-
puts 'Created output directory...'
|
137
|
-
elsif inp.downcase == 'n'
|
138
|
-
raise ArgumentError('Critical Error: An output directory is' \
|
139
|
-
' required; please create an output directory' \
|
140
|
-
' and then try again.')
|
141
|
-
end
|
142
|
-
end
|
143
61
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
else
|
151
|
-
begin
|
152
|
-
fail IOError('The Signal P Script directory cannot be found at' \
|
153
|
-
" the following location: '#{signalp_dir}/'.")
|
154
|
-
rescue IOError
|
155
|
-
puts # a blank line
|
156
|
-
puts 'Error: The Signal P Script directory cannot be found at the' \
|
157
|
-
" following location: '#{signalp_dir}/'."
|
158
|
-
puts # a blank line
|
159
|
-
puts 'Please enter the full path or a relative path to the Signal' \
|
160
|
-
' P Script directory (i.e. to the folder containing the' \
|
161
|
-
' Signal P script). Refer to the online tutorial for more help'
|
162
|
-
print '> '
|
163
|
-
inp = $stdin.gets.chomp
|
164
|
-
until (File.exist? "#{signalp_dir}/signalp") ||
|
165
|
-
(File.exist? "#{inp}/signalp")
|
166
|
-
puts # a blank line
|
167
|
-
puts 'The Signal P directory cannot be found at the following' \
|
168
|
-
" location: '#{inp}'"
|
169
|
-
puts 'Please enter the full path or a relative path to the Signal' \
|
170
|
-
' Peptide directory again.'
|
171
|
-
print '> '
|
172
|
-
inp = $stdin.gets.chomp
|
173
|
-
end
|
174
|
-
signalp_directory = inp
|
175
|
-
puts # a blank line
|
176
|
-
puts "The Signal P directory has been found at '#{signalp_directory}'"
|
177
|
-
FileUtils.ln_s "#{signalp_directory}", "#{Dir.home}/SignalPeptide",
|
178
|
-
force: true
|
179
|
-
puts # a blank line
|
180
|
-
end
|
62
|
+
def guess_sequence_type(seq)
|
63
|
+
# removing non-letter and ambiguous characters
|
64
|
+
cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
|
65
|
+
return nil if cleaned_sequence.length < 10 # conservative
|
66
|
+
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
67
|
+
type == Bio::Sequence::NA ? :genetic : :protein
|
181
68
|
end
|
182
|
-
signalp_directory
|
183
|
-
end
|
184
69
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
return true
|
192
|
-
else
|
193
|
-
return false
|
70
|
+
def check_num_threads(num_threads)
|
71
|
+
num_threads = Integer(num_threads)
|
72
|
+
unless num_threads > 0
|
73
|
+
$stderr.puts 'Number of threads can not be lower than 0'
|
74
|
+
$stderr.puts 'Changing number of threads to 1'
|
75
|
+
num_threads = 1
|
194
76
|
end
|
77
|
+
return num_threads unless num_threads > 256
|
78
|
+
$stderr.puts "Number of threads set at #{num_threads} is" \
|
79
|
+
' unusually high.'
|
195
80
|
end
|
196
|
-
end
|
197
81
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
File.open('signalp_out.txt', 'r') do |file_stream|
|
202
|
-
secondline = file_stream.readlines[1]
|
203
|
-
row = secondline.gsub(/\s+/m, ' ').chomp.split(' ')
|
204
|
-
if row[1] != 'name' && row[4] != 'Ymax' && row[5] != 'pos' &&
|
205
|
-
row[9] != 'D'
|
206
|
-
return true
|
207
|
-
else
|
208
|
-
return false
|
209
|
-
end
|
82
|
+
def assert_binaries(desc, bin)
|
83
|
+
return if command?(bin.to_s)
|
84
|
+
$stderr.puts "NpSearch is unable to use the #{desc} at #{bin}"
|
210
85
|
end
|
211
|
-
end
|
212
86
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
# produced by the Signal P Script are conserved (via 'sp_column'
|
217
|
-
# Method).
|
218
|
-
def sp_results(signalp_output_file)
|
219
|
-
return if sp_version(signalp_output_file)
|
220
|
-
# i.e. if Signal P is the wrong version
|
221
|
-
if sp_column(signalp_output_file) # If wrong version but correct columns
|
222
|
-
puts # a blank line
|
223
|
-
puts 'Warning: The wrong version of signalp has been linked.' \
|
224
|
-
' However, the signal peptide output file still seems to' \
|
225
|
-
' be in the right format.'
|
226
|
-
else
|
227
|
-
puts # a blank line
|
228
|
-
puts 'Warning: The wrong version of the signal p has been linked' \
|
229
|
-
' and the signal peptide output is in an unrecognised format.'
|
230
|
-
puts 'Continuing may give you meaningless results.'
|
231
|
-
end
|
232
|
-
puts # a blank line
|
233
|
-
puts 'Do you still want to continue? [y/n]'
|
234
|
-
print '> '
|
235
|
-
inp = $stdin.gets.chomp
|
236
|
-
until inp.downcase == 'n' || inp.downcase == 'y'
|
237
|
-
puts # a blank line
|
238
|
-
puts "The input: '#{inp}' is not recognised - 'y' or 'n' are the" \
|
239
|
-
' only recognisable inputs.'
|
240
|
-
puts 'Please try again.'
|
87
|
+
# Return `true` if the given command exists and is executable.
|
88
|
+
def command?(command)
|
89
|
+
system("which #{command} > /dev/null 2>&1")
|
241
90
|
end
|
242
|
-
if inp.downcase == 'y'
|
243
|
-
puts 'Continuing.'
|
244
|
-
elsif inp.downcase == 'n'
|
245
|
-
fail IOError('Critical Error: NpSearch only supports SignalP 4.1' \
|
246
|
-
' (downloadable form CBS) Please ensure the version' \
|
247
|
-
' of the signal p script is downloaded.')
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
# Guesses the type of the data in the supplied motif. It ignores all
|
252
|
-
# non-word characters (e.g. '|' that is used for regex). It has a 90%
|
253
|
-
# threshold.
|
254
|
-
def motif_type(motif)
|
255
|
-
motif_seq = Bio::Sequence.new(motif.gsub(/\W/, ''))
|
256
|
-
type = motif_seq.guess(0.9)
|
257
|
-
return unless type.to_s != 'Bio::Sequence::AA'
|
258
|
-
fail IOError('Critical Error: There seems to be an error in' \
|
259
|
-
' processing the motif. Please ensure that the motif' \
|
260
|
-
' contains amino acid residues that you wish to search' \
|
261
|
-
' for.')
|
262
91
|
end
|
263
92
|
end
|
264
93
|
end
|
data/lib/npsearch/output.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'slim'
|
2
2
|
|
3
|
+
# Top level module / namespace.
|
3
4
|
module NpSearch
|
4
5
|
# Class that generates the output
|
5
6
|
class Output
|
@@ -8,17 +9,17 @@ module NpSearch
|
|
8
9
|
templates_path = File.expand_path(File.join(__FILE__, '../../../',
|
9
10
|
'templates/contents.slim'))
|
10
11
|
contents_temp = File.read(templates_path)
|
11
|
-
|
12
|
-
File.open("#{input_file}.
|
12
|
+
h_content = Slim::Template.new { contents_temp }.render(NpSearch)
|
13
|
+
File.open("#{input_file}.npsearch.html", 'w') { |f| f.puts h_content }
|
13
14
|
end
|
14
15
|
|
15
16
|
def to_fasta(input_file, sorted_sequences, input_type)
|
16
|
-
File.open("#{input_file}.
|
17
|
+
File.open("#{input_file}.npsearch.fa", 'w') do |f|
|
17
18
|
sorted_sequences.each do |s|
|
18
19
|
if input_type == :protein
|
19
|
-
f.puts ">#{s.
|
20
|
+
f.puts ">#{s.defline}\n#{s.signalp}#{s.seq}"
|
20
21
|
elsif input_type == :nucleotide
|
21
|
-
f.puts ">#{s.
|
22
|
+
f.puts ">#{s.defline}-(frame:#{s.translated_frame})"
|
22
23
|
f.puts "#{s.signalp}#{s.seq}"
|
23
24
|
end
|
24
25
|
end
|
data/lib/npsearch/pool.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
# From http://burgestrand.se/code/ruby-thread-pool/
|
3
3
|
#
|
4
|
-
# Copyright
|
4
|
+
# Copyright 2012, Kim Burgestrand kim@burgestrand.se
|
5
5
|
#
|
6
6
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
# of this software and associated documentation files (the "Software"), to deal
|