npsearch 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +3 -2
- data/Rakefile +14 -5
- data/bin/npsearch +45 -33
- data/lib/npsearch/arg_validator.rb +70 -241
- data/lib/npsearch/output.rb +6 -5
- data/lib/npsearch/pool.rb +1 -1
- data/lib/npsearch/scoresequence.rb +62 -60
- data/lib/npsearch/sequence.rb +12 -9
- data/lib/npsearch/signalp.rb +29 -10
- data/lib/npsearch/version.rb +1 -1
- data/lib/npsearch.rb +27 -52
- data/npsearch.gemspec +2 -1
- data/templates/contents.slim +3 -3
- data/test/files/mixed_content.fa +167 -0
- data/test/test_argument_validator.rb +50 -0
- data/test/test_helper.rb +1 -0
- data/test/test_sequence.rb +81 -0
- data/test/test_sequence_scoring.rb +142 -0
- metadata +27 -17
- data/test/files/1_protein.fa +0 -204
- data/test/files/2_orf.fa +0 -1330
- data/test/files/3_signalp_out.txt +0 -667
- data/test/files/4_secretome.fa +0 -6
- data/test/files/5_output.fa +0 -6
- data/test/files/5_output.html +0 -37
- data/test/test_np_search.rb +0 -122
@@ -1,108 +1,110 @@
|
|
1
|
-
|
2
|
-
require 'tempfile'
|
3
|
-
|
1
|
+
# Top level module / namespace.
|
4
2
|
module NpSearch
|
5
3
|
# A class to score the Sequences
|
6
4
|
class ScoreSequence
|
7
5
|
class << self
|
8
|
-
DI_CLV
|
9
|
-
MONO_NP_CLV_2 = '[KR]..R'
|
10
|
-
MONO_NP_CLV_4 = '[KR]....R'
|
11
|
-
MONO_NP_CLV_6 = '[KR]......R'
|
6
|
+
DI_CLV = 'KR|RR|KK'.freeze
|
7
|
+
MONO_NP_CLV_2 = '[KR]..R'.freeze
|
8
|
+
MONO_NP_CLV_4 = '[KR]....R'.freeze
|
9
|
+
MONO_NP_CLV_6 = '[KR]......R'.freeze
|
12
10
|
NP_CLV = "(#{DI_CLV})|(#{MONO_NP_CLV_2})|(#{MONO_NP_CLV_4})|" \
|
13
|
-
"(#{MONO_NP_CLV_6})"
|
11
|
+
"(#{MONO_NP_CLV_6})".freeze
|
14
12
|
|
15
|
-
def run(sequence)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
acidic_spacers
|
13
|
+
def run(sequence, opt)
|
14
|
+
split_into_potential_neuropeptides(sequence)
|
15
|
+
count_np_cleavage_sites(sequence)
|
16
|
+
count_c_terminal_glycines(sequence)
|
17
|
+
np_similarity(sequence, opt[:temp_dir])
|
18
|
+
acidic_spacers(sequence)
|
22
19
|
end
|
23
20
|
|
24
21
|
private
|
25
22
|
|
26
|
-
def
|
23
|
+
def split_into_potential_neuropeptides(sequence)
|
27
24
|
potential_nps = []
|
28
|
-
results =
|
25
|
+
results = sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
|
29
26
|
headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
|
30
27
|
di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
|
31
28
|
results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
|
32
|
-
|
29
|
+
sequence.potential_cleaved_nps = potential_nps
|
33
30
|
end
|
34
31
|
|
35
|
-
def count_np_cleavage_sites
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
32
|
+
def count_np_cleavage_sites(sequence)
|
33
|
+
return if sequence.potential_cleaved_nps.empty?
|
34
|
+
sequence.potential_cleaved_nps.each do |e|
|
35
|
+
count_dibasic_np_clv(sequence, e[:di_clv_end])
|
36
|
+
count_mono_basic_np_clv(sequence, e[:mono_2_clv_end],
|
37
|
+
e[:mono_4_clv_end], e[:mono_6_clv_end])
|
40
38
|
end
|
41
39
|
end
|
42
40
|
|
43
|
-
def count_dibasic_np_clv(dibasic_clv)
|
41
|
+
def count_dibasic_np_clv(sequence, dibasic_clv)
|
44
42
|
case dibasic_clv
|
45
43
|
when 'KR'
|
46
|
-
|
44
|
+
sequence.score += 0.09
|
47
45
|
when 'RR', 'KK'
|
48
|
-
|
46
|
+
sequence.score += 0.05
|
49
47
|
end
|
50
48
|
end
|
51
49
|
|
52
|
-
def count_mono_basic_np_clv(mono_2_clv, mono_4_clv, mono_6_clv)
|
50
|
+
def count_mono_basic_np_clv(sequence, mono_2_clv, mono_4_clv, mono_6_clv)
|
53
51
|
return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
|
54
|
-
|
52
|
+
sequence.score += 0.02
|
55
53
|
end
|
56
54
|
|
57
55
|
# Counts the number of C-terminal glycines
|
58
|
-
def count_c_terminal_glycines
|
59
|
-
|
60
|
-
|
61
|
-
|
56
|
+
def count_c_terminal_glycines(sequence)
|
57
|
+
return if sequence.potential_cleaved_nps.empty?
|
58
|
+
sequence.potential_cleaved_nps.each do |e|
|
59
|
+
if e[:np] =~ /FG$/ && e[:di_clv_end] == 'KR'
|
60
|
+
sequence.score += 0.40
|
61
|
+
elsif e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
|
62
|
+
sequence.score += 0.25
|
62
63
|
elsif e[:np] =~ /G$|GK$|GR$/
|
63
|
-
|
64
|
+
sequence.score += 0.10
|
64
65
|
end
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
69
|
+
# Adds 0.10 if the acidic spacer is detected.
|
70
|
+
# Acidic Spacer is defined as being less than 25% of the precursor length
|
71
|
+
# (not including the Signalp) && having more than 50% D and E amino acids.
|
72
|
+
def acidic_spacers(sequence)
|
73
|
+
sequence.potential_cleaved_nps.each do |e|
|
74
|
+
next if e[:np].length / sequence.seq.length > 0.25
|
75
|
+
sequence.score += 0.10 if e[:np].count('DE') / e[:np].length > 0.5
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
76
|
-
def np_similarity
|
77
|
-
results
|
78
|
-
results.
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
if
|
83
|
-
|
79
|
+
def np_similarity(sequence, temp_dir, results = nil)
|
80
|
+
results = run_cdhit(sequence, temp_dir) if results.nil?
|
81
|
+
clusters = results.split(/^>Cluster \d+\n/)
|
82
|
+
clusters.each do |c|
|
83
|
+
next if c.nil?
|
84
|
+
no_of_seqs_in_cluster = c.split("\n").length
|
85
|
+
if no_of_seqs_in_cluster > 1
|
86
|
+
sequence.score += (0.15 * no_of_seqs_in_cluster)
|
84
87
|
end
|
85
88
|
end
|
86
89
|
end
|
87
90
|
|
88
|
-
def
|
89
|
-
f = Tempfile.new('
|
90
|
-
fo = Tempfile.new('
|
91
|
-
|
92
|
-
`
|
93
|
-
IO.read(fo.path)
|
94
|
-
ensure
|
95
|
-
f.unlink
|
96
|
-
fo.unlink
|
91
|
+
def run_cdhit(sequence, temp_dir)
|
92
|
+
f = Tempfile.new('clust', temp_dir)
|
93
|
+
fo = Tempfile.new('clust_out', temp_dir)
|
94
|
+
return unless write_potential_peptides_to_tempfile(sequence, f)
|
95
|
+
`cd-hit -c 0.5 -n 3 -l 4 -i #{f.path} -o #{fo.path}`
|
96
|
+
IO.read("#{fo.path}.clstr")
|
97
97
|
end
|
98
98
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
-
|
99
|
+
def write_potential_peptides_to_tempfile(sequence, tempfile)
|
100
|
+
return false if sequence.potential_cleaved_nps.empty?
|
101
|
+
sequences = ''
|
102
|
+
sequence.potential_cleaved_nps.each_with_index do |e, i|
|
103
|
+
sequences += ">seq#{i}\n#{e[:np]}\n"
|
103
104
|
end
|
104
|
-
tempfile.write(
|
105
|
+
tempfile.write(sequences)
|
105
106
|
tempfile.close
|
107
|
+
true
|
106
108
|
end
|
107
109
|
end
|
108
110
|
end
|
data/lib/npsearch/sequence.rb
CHANGED
@@ -1,23 +1,26 @@
|
|
1
|
+
# Top level module / namespace.
|
1
2
|
module NpSearch
|
3
|
+
# Adapted from GeneValidator's Query Class..
|
2
4
|
# A class to hold sequence data
|
3
5
|
class Sequence
|
4
|
-
DI_NP_CLV
|
5
|
-
MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'
|
6
|
+
DI_NP_CLV = 'KR|KK|RR'.freeze
|
7
|
+
MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'.freeze
|
6
8
|
|
7
9
|
attr_reader :id
|
10
|
+
attr_reader :defline
|
8
11
|
attr_reader :signalp
|
9
12
|
attr_reader :seq
|
10
13
|
attr_reader :html_seq
|
11
|
-
attr_reader :signalp_output
|
12
14
|
attr_reader :translated_frame
|
13
15
|
attr_accessor :score
|
14
16
|
attr_accessor :potential_cleaved_nps
|
15
17
|
|
16
|
-
def initialize(
|
17
|
-
@id =
|
18
|
-
|
19
|
-
|
20
|
-
@
|
18
|
+
def initialize(entry, sp, frame = nil)
|
19
|
+
@id = entry.entry_id
|
20
|
+
@defline = entry.definition
|
21
|
+
sp_cleavage_site_idx = sp[:ymax_pos].to_i - 1
|
22
|
+
@signalp = sp[:orf][0..(sp_cleavage_site_idx - 1)]
|
23
|
+
@seq = sp[:orf][sp_cleavage_site_idx..-1]
|
21
24
|
@html_seq = format_seq_for_html
|
22
25
|
@translated_frame = frame
|
23
26
|
@score = 0
|
@@ -27,7 +30,7 @@ module NpSearch
|
|
27
30
|
def format_seq_for_html
|
28
31
|
seq = @seq.gsub(/C/, '<span class=cysteine>C</span>')
|
29
32
|
seq.gsub!(/#{DI_NP_CLV}/i, '<span class=np_clv>\0</span>')
|
30
|
-
seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target R
|
33
|
+
seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target 'R'
|
31
34
|
seq.gsub!('R::NP_CLV::', '<span class=mono_np_clv>R</span>')
|
32
35
|
seq.gsub!('G<span class=np_clv>',
|
33
36
|
'<span class=glycine>G</span><span class=np_clv>')
|
data/lib/npsearch/signalp.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'forwardable'
|
2
|
-
require 'tempfile'
|
3
2
|
|
3
|
+
# Top level module / namespace.
|
4
4
|
module NpSearch
|
5
5
|
# A class to hold sequence data
|
6
6
|
class Signalp
|
@@ -10,15 +10,34 @@ module NpSearch
|
|
10
10
|
|
11
11
|
def analyse_sequence(seq)
|
12
12
|
sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
|
13
|
-
sp dmaxcut networks)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
sp dmaxcut networks orf)
|
14
|
+
data = setup_analysis(seq)
|
15
|
+
orf_results = []
|
16
|
+
s = `echo "#{data[:fasta]}\n" | #{opt[:signalp_path]} -t euk \
|
17
|
+
-f short -U 0.34 -u 0.34`
|
18
|
+
sp_results = s.split("\n").delete_if { |l| l[0] == '#' }
|
19
|
+
sp_results.each_with_index do |line, idx|
|
20
|
+
line = line + ' ' + data[:seq][idx].to_s
|
21
|
+
orf_results << Hash[sp_headers.map(&:to_sym).zip(line.split)]
|
22
|
+
end
|
23
|
+
orf_results.sort_by { |h| h[:d] }.reverse[0]
|
24
|
+
end
|
25
|
+
|
26
|
+
def setup_analysis(seq)
|
27
|
+
if opt[:type] == :protein
|
28
|
+
data = { seq: [seq], fasta: ">seq\n#{seq}" }
|
29
|
+
else
|
30
|
+
orfs = seq.scan(/(?=(M\w+))./).flatten
|
31
|
+
orfs.unshift(seq)
|
32
|
+
data = { seq: orfs, fasta: create_orf_fasta(orfs) }
|
33
|
+
end
|
34
|
+
data
|
35
|
+
end
|
36
|
+
|
37
|
+
def create_orf_fasta(m_orf)
|
38
|
+
fasta = ''
|
39
|
+
m_orf.each_with_index { |seq, idx| fasta << ">#{idx}\n#{seq}\n" }
|
40
|
+
fasta
|
22
41
|
end
|
23
42
|
end
|
24
43
|
end
|
data/lib/npsearch/version.rb
CHANGED
data/lib/npsearch.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'bio'
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
|
-
|
4
|
+
require 'npsearch/arg_validator'
|
5
5
|
require 'npsearch/output'
|
6
6
|
require 'npsearch/pool'
|
7
7
|
require 'npsearch/scoresequence'
|
@@ -11,33 +11,39 @@ require 'npsearch/signalp'
|
|
11
11
|
# Top level module / namespace.
|
12
12
|
module NpSearch
|
13
13
|
class <<self
|
14
|
-
MIN_ORF_SIZE = 30 # amino acids (including potential signal peptide)
|
15
|
-
|
16
14
|
attr_accessor :opt
|
17
15
|
attr_accessor :sequences
|
18
16
|
attr_reader :sorted_sequences
|
19
17
|
|
20
18
|
def init(opt)
|
21
|
-
|
22
|
-
@
|
23
|
-
@sequences = []
|
19
|
+
@opt = ArgumentsValidators.run(opt)
|
20
|
+
@sequences = []
|
24
21
|
@sorted_sequences = nil
|
25
|
-
@opt[:
|
26
|
-
|
22
|
+
@pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
|
23
|
+
FileUtils.mkdir_p(@opt[:temp_dir])
|
24
|
+
extract_orf if @opt[:type] == :genetic
|
27
25
|
end
|
28
26
|
|
29
27
|
def run
|
30
|
-
|
28
|
+
input_file = @opt[:type] == :protein ? @opt[:input_file] : @opt[:orf]
|
29
|
+
iterate_input_file(input_file)
|
31
30
|
@sorted_sequences = @sequences.sort_by(&:score).reverse
|
32
31
|
Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
|
33
32
|
Output.to_html(@opt[:input_file])
|
33
|
+
remove_temp_dir
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
# Uses getorf from EMBOSS package to extract all ORF
|
39
|
+
def extract_orf(input = @opt[:input_file], minsize = 90)
|
40
|
+
@opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
|
41
|
+
system "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
|
42
|
+
" -minsize #{minsize} >/dev/null 2>&1"
|
43
|
+
end
|
44
|
+
|
45
|
+
def iterate_input_file(input_file)
|
46
|
+
Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
|
41
47
|
if @opt[:num_threads] > 1
|
42
48
|
@pool.schedule(entry) { |e| initialise_seqs(e) }
|
43
49
|
else
|
@@ -48,49 +54,18 @@ module NpSearch
|
|
48
54
|
end
|
49
55
|
|
50
56
|
def initialise_seqs(entry)
|
51
|
-
if @opt[:
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
def initialise_protein_seq(id, seq)
|
59
|
-
sp = Signalp.analyse_sequence(seq)
|
60
|
-
return unless sp[:sp] == 'Y'
|
61
|
-
seq = Sequence.new(id, seq, sp)
|
62
|
-
ScoreSequence.run(seq)
|
57
|
+
return if entry.aaseq.length > @opt[:max_seq_length]
|
58
|
+
sp = Signalp.analyse_sequence(entry.aaseq)
|
59
|
+
return if sp[:sp] == 'N'
|
60
|
+
# seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
|
61
|
+
seq = Sequence.new(entry, sp)
|
62
|
+
ScoreSequence.run(seq, @opt)
|
63
63
|
@sequences << seq
|
64
64
|
end
|
65
65
|
|
66
|
-
def
|
67
|
-
(
|
68
|
-
|
69
|
-
orfs = translated_seq.to_s.scan(/(?=(M\w{#{MIN_ORF_SIZE},}))./).flatten
|
70
|
-
initialise_orfs(id, orfs, f)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def initialise_orfs(id, orfs, frame)
|
75
|
-
orfs.each do |orf|
|
76
|
-
sp = Signalp.analyse_sequence(orf)
|
77
|
-
next if sp[:sp] == 'N'
|
78
|
-
seq = Sequence.new(id, orf, sp, frame)
|
79
|
-
ScoreSequence.run(seq)
|
80
|
-
@sequences << seq
|
81
|
-
# The remaining ORF in this frame are simply shorter versions of the
|
82
|
-
# same orf so break loop once signal peptide is found.
|
83
|
-
break if sp[:sp] == 'Y'
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def guess_sequence_type
|
88
|
-
fasta_content = IO.binread(@opt[:input_file])
|
89
|
-
# removing non-letter and ambiguous characters
|
90
|
-
cleaned_sequence = fasta_content.gsub(/[^A-Z]|[NX]/i, '')
|
91
|
-
return nil if cleaned_sequence.length < 10 # conservative
|
92
|
-
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
93
|
-
(type == Bio::Sequence::NA) ? :nucleotide : :protein
|
66
|
+
def remove_temp_dir
|
67
|
+
return unless File.directory?(@opt[:temp_dir])
|
68
|
+
FileUtils.rm_rf(@opt[:temp_dir])
|
94
69
|
end
|
95
70
|
end
|
96
71
|
end
|
data/npsearch.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
|
|
16
16
|
' For more information: https://github.com/wurmlab/npsearch'
|
17
17
|
s.summary = 'Search for neuropeptides based on the common' \
|
18
18
|
' neuropeptides markers'
|
19
|
-
s.homepage = 'https://github.com/
|
19
|
+
s.homepage = 'https://github.com/wurmlab/npsearch'
|
20
20
|
s.license = 'AGPL'
|
21
21
|
|
22
22
|
s.files = `git ls-files -z`.split("\x0")
|
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
|
|
28
28
|
s.add_development_dependency 'bundler', '~> 1.6'
|
29
29
|
s.add_development_dependency 'rake', '~>10.3'
|
30
30
|
s.add_development_dependency 'coveralls'
|
31
|
+
s.add_development_dependency 'minitest', '~> 5.4'
|
31
32
|
|
32
33
|
s.add_dependency 'bio', '~> 1.4'
|
33
34
|
s.add_dependency 'slim', '~> 3.0'
|
data/templates/contents.slim
CHANGED
@@ -29,9 +29,9 @@ html lang="en"
|
|
29
29
|
p.sequence
|
30
30
|
span.id
|
31
31
|
- if @opt[:type] == :protein
|
32
|
-
| >#{seq.
|
32
|
+
| >#{seq.defline}
|
33
33
|
- elsif @opt[:type] == :nucleotide
|
34
|
-
| >#{seq.
|
34
|
+
| >#{seq.defline}-(frame:#{seq.translated_frame})
|
35
35
|
br
|
36
36
|
span.seq== seq.html_seq
|
37
37
|
br
|
@@ -50,5 +50,5 @@ html lang="en"
|
|
50
50
|
a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
|
51
51
|
br
|
52
52
|
| This page was created by
|
53
|
-
a href="https://github.com/wurmlab/
|
53
|
+
a href="https://github.com/wurmlab/npsearch" target="_blank" NpSearch
|
54
54
|
| v#{NpSearch::VERSION}
|
@@ -0,0 +1,167 @@
|
|
1
|
+
>isotig00006 gene=isogroup00003 length=1747 numContigs=6
|
2
|
+
AGTTAAAAGTTGAAAAATTGGTGACCATATTTTGACACTCTAGCATATTTGGGAGCTATA
|
3
|
+
TACTGATTTGGGTTTCACCATGCACAGATGAGGTATATACATAAGTTGAAAGCCTGCAGC
|
4
|
+
TCTATATTAAAGGCATTGAAGACtcGCCcAAaccgtgTGcgcccTCTGAAAAaGTTAACT
|
5
|
+
TTCcGTTgCTTGCAaGTGAAGTTTtcTtCTTGTCGCTACAAAATGCAGACAGTAATGAAA
|
6
|
+
|
7
|
+
>isotig00007 gene=isogroup00003 length=1749 numContigs=5
|
8
|
+
TGTGTGTGTGTGGTGCTTCCccTCTAGGGCTGTAAATTTCAAAGGAACCTTGCGCAAGAA
|
9
|
+
CAGtAGCTTGCGaCGTTTTTCAAaaCCAGAGGTTCTGAACTGAACTGTACTGACTACTGT
|
10
|
+
AGGGtacTTAAaGGCATTGAAGACTCGCCcAAaCCatgTGCCGCGctttGAAAAAGTTAA
|
11
|
+
CTTTCCGTTGCTTGCAAATGAcGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgA
|
12
|
+
AACGTGATACcTtGTtATCTTTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTG
|
13
|
+
TGTTGTGGgTATTGACcGTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGAC
|
14
|
+
GGTAGCAAGCTGTGTGTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACA
|
15
|
+
CCtCAtTcGAAACTAGGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACA
|
16
|
+
cccttttaaggagaagtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGG
|
17
|
+
TAGCATGCAACTTAAAAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATT
|
18
|
+
ACAAGGttAAtCtacTGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCT
|
19
|
+
TtCAaTAaTTATACAAACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTC
|
20
|
+
AAGTTTATCAATGTAATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCT
|
21
|
+
GTGGATAAGACTGCCAGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCC
|
22
|
+
TTCACCTCCTTGCAGATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATT
|
23
|
+
AAAATCTGGCTTCCTcCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGT
|
24
|
+
GTGAAACCACTGAAAGATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGA
|
25
|
+
CAGAGGCCACACTGATACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATT
|
26
|
+
TTGCTTCTGCGATGCAGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAa
|
27
|
+
CTGCCtGGTTtAtAGAGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTG
|
28
|
+
TCATGGCCTTGAGCAAGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTA
|
29
|
+
GGTACTGTcAAATCCACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAA
|
30
|
+
AGAGTACAATGAGGGTTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGC
|
31
|
+
AGCTGaaaaGATATTCAGAAaTTGTTATATATGAGTGTGTTTGTATGCATGCAtATGtGT
|
32
|
+
GATTTtCTtGCTTTACAGAACAGCTCCaTTTTGATAAGCTAtgTAAcgtGgAAACCTGCC
|
33
|
+
AATCAaTGTTtgAAataGGAcaGgCTGAAACGATTCTTAAATGAAAAGCTTAAtgaCTTc
|
34
|
+
TTgCAtttttaTACATCACTGTTCAGGtAaGGCCAGTAAGGgCAGTATgAaGAAtAaGTA
|
35
|
+
ACAATtAATAATTATCATTATGGCCATTTGCTGtcTGCATAAtAaCAAACTGAATGATGT
|
36
|
+
CATCAGCCCTgTGCTCAGTTGACAgAACTGACAAGTAGGCACACaaTGTCAGTGTGATCC
|
37
|
+
ATGAAACCT
|
38
|
+
>isotig00008 gene=isogroup00003 length=1726 numContigs=6
|
39
|
+
AGGTTTCATGGATCACACTGACAtTGTGTGCCTACTTGTCAGTTcTGTCAACTGAGCAcA
|
40
|
+
GGGCTGATGACATCATTCAGTTTGttattATGCAggaCAGCAAATGGCCATAATGATAAT
|
41
|
+
TATTAaTTGTTACTtaTTCTtcATACTGCCcTTACTGGCCTtaCCTGAACAGTGATGTAt
|
42
|
+
caaaaTGcAAgAAGtcaTTAAGCTTTTCATTTAAGAATCGTTTCAGCctgTCCtaatTTt
|
43
|
+
cAAaCAtTGATTGGCAGGTTTCcacgTTAcaTAGCTTATCAAAAtGGAGCTGTTCTGTAA
|
44
|
+
AGCAAGaAAATCACaCATaTGCATGCATACAAACACACTCATATATAACAAtTTCTGAAT
|
45
|
+
ATCTtttCAGCTGCCCAAAGGTACTGGAATGCAAGCTCTGTTGGGTGAATTAAAAAaCCc
|
46
|
+
TCATTGTACTCTTTTATCATGGTCAGCGTAGCTGGAACCAGCAATGATGGTGATGTGGAT
|
47
|
+
TTGACAGTACCTAGACAAGAATTCAATAGCTGCTTTGTGTTGAAGTGGGTTCAACTTGCT
|
48
|
+
CAAGGCCATGACAGGATGACGACTAGACATtAGGATGAATTATCTGTTGCAGAGCTCTAT
|
49
|
+
AAaCCAGGCAGTtGTtAAAaCATCCATTGGGTGACCcTCACAGTCTACCAGGCACTGCAT
|
50
|
+
CGCAGAAGCAAAATGAAGTGTGTGTgATACCTTCATCTTATGAGAGTGGAATGGTATCAG
|
51
|
+
TGTGGCCTCTGTCAATTTGGCTTCAAGTCTTTGTTTTCTTGAAAGCTGAGAaGATCTTTC
|
52
|
+
AGTGGTTTCACACTGACCTCTGTACTTGACAATTCATGTGTATTTGCCAGCTCAGgAGGA
|
53
|
+
AGCCAGATTTTAATTACATTAACCAATGCTGACTTTTTTttGGACATGTGGTACATCTGC
|
54
|
+
AAGGAGGTGAAGGAAGCAGTTGGAGTTGCATGGATGTGGAATCTTGTTGATAGTCTGGCA
|
55
|
+
GTCTTATCCACAGATTATCCCAAAGCTTCTCCACATTGCCACATTCAGAACTTATTACAT
|
56
|
+
TGATAAACTTGAAAATtGCAGGAATCTAaCcAaGCACCcATCAaGGGAaTTTGTTTGTAT
|
57
|
+
AATtATtGAAaGCTGTGACcTTCTGATGTGACAGACTAATGTGAAaTAAAGGgCAgtaGa
|
58
|
+
TTaCCTTGTaaTGAACCttGTTATTGTTTGATTGTATCTAAtGTTTGCAaaTTTTAAGTT
|
59
|
+
GCATGCTACCAATTGAAACATAATTCTTTCTCTAttaatgggatataaaatacttctcct
|
60
|
+
taaaagggTGTgAaGACTcggCACAAAGAAACGTCtaTGCcGgtAaTCTGACCTAGTTTc
|
61
|
+
gAatGaGGTGTAACagAAGTgTtAGACACcACCAttGATCCcAGAAAATACACACACAGC
|
62
|
+
TTGCTACCGTCGGTAaTTAGACACTAGTGTACAGTCAaTACATACAGCTAcGgTCAATAC
|
63
|
+
CCACAaCACAgTGTAcATAGCAGCGaTGgACATCTCAGGTCTAGATAAAAGATAaCAAGG
|
64
|
+
TATCACGTTTCATtaCTGTCTGCATTTtGTAGCgaCAagAAGAAAAcgtCATTtGCAAGC
|
65
|
+
AaTGgAAAGTtAACTTTTTCaGAGCGcagCAcGCgggTTGGGGCAAGTCTTCCAAGCCTT
|
66
|
+
TAAGTtGACAtcTTGCCTTTGGCTATCCAGGgTGACAAGATGATACTAGCAGGTAgagtg
|
67
|
+
actaattgagccctgtgtgagaaaccaatgcagaatctagcctagt
|
68
|
+
>isotig00009 gene=isogroup00003 length=1827 numContigs=2
|
69
|
+
TAGCTGTGATCTAGTGGATCTGACTGGCCTTTTGATTATTTCAGCacGATTCTCAGACTA
|
70
|
+
CAGTTGTAAaCCTACTTCGACTACTACTACTActagtacTAACGGTGCAACGTTGTTATA
|
71
|
+
AGTTTGCCAAAGGTGAAACTTTAGCCTTAGGACtGTGTTTATTTTATTTGCAGTCGCATT
|
72
|
+
CgCCTAACTGTTTTCTGTTACTGGGTGCATTTAACTCACATTAATAGAGGATTTTtGACT
|
73
|
+
AGTtCcTAGAGAGTGGTGTTTCTGTTTTACCACCATGGCAAAAAAGGGAAaGCCTCGCCC
|
74
|
+
TGACCATAGGCCTCCTGCACACAACCCGCATTATGCTCATGATCCACCACCTTATTCACA
|
75
|
+
ACAGCAACCACCACTTCAACAGCAGAACTATGCACAACAAATGCATCATGGTGGAGGTGG
|
76
|
+
TGGAAATAGACAACATGCACGACcTAGACCTAGTCCACCTTCAGAAGTCAGTGACTGTGT
|
77
|
+
CAAGTACTCCCTTTTCTtGTATAACTGCATCTTTTGgaTTGtCGGCCTTttCTTTATtGC
|
78
|
+
AGCAGGTATCTGGgCATTTCACGATAGGGGTGTTTTTAATGAATTCCAGTCACTTAGTAC
|
79
|
+
CAATGAGGTCTCCTTTCTCACTGATCCTGTTATTTGGCTGTTCGTCCTCGGAGGTGTAGT
|
80
|
+
TTTCATGCTGGGAACCCTCGGATGTCTgGGGgCCCTCAGAGAAAaTATCTGCATGCTGAA
|
81
|
+
GTGTTTTAGCATAATCATGGGGCTTATACTGCTGCTGGAAATTGGAGGTGGATGTGCGAT
|
82
|
+
ATACTTCTATCGTGCACAGATTCAGGCACAGTTTCAAAAGTCCTTAACAGATGTGaCCAT
|
83
|
+
AACAGATTACAGAGAAAATGCTGATTTCCAGGATCTCATAGACGCATTACAATCCGGTCT
|
84
|
+
TTCTTGTTGTGGTGTCAATTCCTatGAAGACTGGGATAATAATATTTATTTCAACTGTAG
|
85
|
+
TGGTCCTGCCAATAACCCTGAAGCcttGTGGTGTGCCTTtCTCCTGTTGTATACCGGATC
|
86
|
+
AAGCAAGCGGAGTAGCCAACACCCAGTGCGGTTATGGAGTTCGTTCCCCCGAACAACAAA
|
87
|
+
ATACTTTCCACACAAAGATTTACACCACTGGCTGTGCGGATATGTTTACAATGTGGATTA
|
88
|
+
ATAGGTACCTATATTACATAGCAGGCATTGCTGGGGTCATTGTCTTGGTCGAGTtGTTTG
|
89
|
+
GATTCTGTTTTGCACATTCCCTCATCAACGACATCAAACGCCAAAAGGCCCGCTGGGCGC
|
90
|
+
ATCGATAATTCATTCCAGGATGTTGGTGgATGATGCTACTCAAGGGagAAGACTGACAGT
|
91
|
+
GCCTTTtGGTCAaTATCGTGTAGCATCAGGAAGGAGGTAGTACCTCCTCAACTAACCaTA
|
92
|
+
ACAGAATTTGTCCAGTTTGTAACATCGTCAAGAAATAAACAGACTTTTTTTACCATTAGG
|
93
|
+
ACgTGATAATACTACCACGTAACCTCTCAAAGCACAAAAAGCAAAAAGCAAATATCTCCT
|
94
|
+
TGTTTTAAAATTAGaagGTCTATCTCAGATAACAACCACAGAACATgTGGAGTTTTCCtT
|
95
|
+
TATGCTATCATAAAGATATAAATATATATAAAATTGAGGTAGcATCtTGGCTACCCACCA
|
96
|
+
AAATCATTTTTTTTCCAGTTTGaAACATCATGGAACATTTCAGAACAAAGATCATTTCAG
|
97
|
+
TCGTTACCACACTCAAGAgaTTGCTGTcGTCAaCaTTTtGtaGCTTTTtAAtGTCTTGAT
|
98
|
+
CTTCGTCGACATCGTCAATGTGTAAACTATTCTCGACGAGAGATTAGTGTCTAATACTGC
|
99
|
+
GGGTgATTTGATATAAATCTCACTTGG
|
100
|
+
>isotig00010 gene=isogroup00003 length=1650 numContigs=5
|
101
|
+
TGAATGAGAAAtGAAATTTAGCGAAGAAATCACCTTGTAAATTAAAAACTAAAATGGCTT
|
102
|
+
TCACACAAATTAaCAGTAAAtGgAGAATGTTTTTAAAGCAATATATGCAGTACAGCcATT
|
103
|
+
CATTGGAAAACAGTAAcAAAaTACATTTATCTTGTtcATTTTtACctCctGCAAaacTTA
|
104
|
+
cAaCcGTTAATTATGTAGATTGGATGGCACTAACAGGGTACTTGTCTTATCTGCCTATTG
|
105
|
+
GATAATGTGGcATTAATACTACTGTGTATGGGCACTGAGGCTGAGAGTGCAGTAAGTTtA
|
106
|
+
AAGGCATTGAAGACTCtCCCCGAaCcGCGtGCCGGGCTctGAAAAAGTtAaCTGCTCGCA
|
107
|
+
AaTtAcGTTTtCTtCTTGTCaCTaCAAAaTGCAGACATTaaTGAAACGTGATACCTTGTt
|
108
|
+
ATCTTTTATCTAGACCTGAGATGTCcAtCGCTGCTATgTACAcTGTGTTGTGGGTATTGA
|
109
|
+
CcgTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGT
|
110
|
+
GTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTA
|
111
|
+
GGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaa
|
112
|
+
gtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAA
|
113
|
+
AAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtac
|
114
|
+
TGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACA
|
115
|
+
AACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTA
|
116
|
+
ATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCC
|
117
|
+
AGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAG
|
118
|
+
ATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCT
|
119
|
+
cCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAA
|
120
|
+
GATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGA
|
121
|
+
TACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGC
|
122
|
+
AGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAG
|
123
|
+
AGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCA
|
124
|
+
AGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCC
|
125
|
+
ACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGG
|
126
|
+
TTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTT
|
127
|
+
TGTTCCTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAG
|
128
|
+
CACAGTGTGCAAAGctGCTATATATTGTCC
|
129
|
+
>isotig00011 gene=isogroup00003 length=1525 numContigs=6
|
130
|
+
ACATTCTTCAAGAGCTCTGCACCCACCAATCTAAAGTGACCAGCCAAGTGACTGACCTCA
|
131
|
+
GGGCACAGTTAGCAGCTTTGACCACAGGATGAGCTATGTAACAACTGAAtgaaTGGTGTT
|
132
|
+
CAtcGTTGATTGGGCAgTCAAAACAGCTGAATTTCTCTTGCGgAAGACATAAAGGCATTG
|
133
|
+
AAGACtcGCCcAAaccGtGTGcgcccTCTGAAAAaGTTAACTTTctGTTgCTTGCAaGTG
|
134
|
+
AAGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgAAACGTGATACcTtGTtATCT
|
135
|
+
TTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTGTGTTGTGGgTATTGACcGTA
|
136
|
+
GCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGTGTGTA
|
137
|
+
TTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTAGGTCA
|
138
|
+
GAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaagtatt
|
139
|
+
ttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAAAAtTT
|
140
|
+
TGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtacTGCCC
|
141
|
+
TTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACAAACAA
|
142
|
+
ATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTAATAAG
|
143
|
+
TTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCCAGACT
|
144
|
+
ATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAGATGTA
|
145
|
+
CCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCTcCTGA
|
146
|
+
GCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAAGATCT
|
147
|
+
TCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGATACCA
|
148
|
+
TTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGCAGTGC
|
149
|
+
CTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAGAGCTC
|
150
|
+
TGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCAAGTTG
|
151
|
+
AACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCCACATC
|
152
|
+
ACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGGTTTTT
|
153
|
+
TAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTTTGTTC
|
154
|
+
CTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAGCACAG
|
155
|
+
TGTGCAAAGctGCTATATATTGTCC
|
156
|
+
>isotig00001_f6
|
157
|
+
FRIYKNYALQYVSKDAHLSLLWSAVTHLGTQYFGKTPFVLTYILLTECAVESCMEACIETLINNYEKDKLLPLQYYTSYFLCRHLAKCDYSKTDKILNVCQRNLMAFLLKINPFHLLRHRLAPNRMQPYEKVNFLLLASDVLFVSLQNADSNETRYIVIFSWTDVHRCTVCTLCCGCLQLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITAPFFLRESSHRIMIIGFSNLEPLTKCVTELIFLVTKKTLVHMRERYINMSIRGRVVYLLPDSADNSQLFENTLCYISSSFKAPKPNQQNKKPNEQRNCMTNVNFSKIELLKHISQHISHRANRTELELTVHCTFHFFLYLKVTTKLHRKTKTSSLHFNHKNIFALITKKIRYTIRQTLPYLKPLRLCSTLSVLNCLYSTVLTRQISIIKISKIYYIHNLKDVECLSGVKAGAFQSGPLFESLQDCMSSSYRVVENRQFIIMDVKYETKRLTSVSLRLANEASSRVPACRMTNTYIYITVMRNAYLPKRCSYTRKGSTHSHLLKVDVVVHVVYFHHLHHDAFVVHSSAVEVVVAVVNKVVDHEHNAGCVQEAYGQGEAFPFLPWWNRNTTLELVKNPLLMVKCTQQKTVRRMRLQIKTQSGSFTFGKLITTLHRYSKVYNCSLRIVLKSKGQSDPLDHS
|
158
|
+
>isotig00002_f1
|
159
|
+
MRNEIRRNHLVNKLKWLSHKLTVNGECFSNICSTAIHWKTVTKYIYLVHFYLLQNLQPLIMIGWHQGTCLICLLDNVALILLCMGTEAESAVSLKALKTLPEPRAGLKSLLANYVFFLSLQNADINETYLVIFYLDLRCPSLLCTLCCGYPLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITGIDVSLCRVFTPFGEVFYIPLIEKELCFNWHATNFANIRYNQTITRFITRLIYCPLFHISLSHQKVTAFNNYTNKFPWVLGIPAIFKFINVISSECGNVEEALGSVDKTARLSTRFHIHATPTASFTSLQMYHMSKKKSALVNVIKIWLPPELANTHELSSTEVSVKPLKDLLSFQENKDLKPNQRPHYHSTLIRRYHTHFILLLRCSAWTVRVTQWMFQLPGLSSATDNSSCLVVILSWPASTHFNTKQLLNSCLGTVKSTSPSLLGSSYADHDKRVQGFFNSPNRACIPVPLGSKDIQKLLYMSVFVCMHMCDFLALQNSSILISYVTWKPANQCLKDRLKRFLNEKLNDFLHFYTSLFRGQGQYEEVTINNYHYGHLLSAQTECHQPCAQLTELTSRHTMSVSMKP
|
160
|
+
>isotig00008_f3
|
161
|
+
VSWITLTLCAYLSVLSTEHRADDIIQFVIMQDSKWPLLIVTYSSYCPYWPYLNSDVSKCKKSLSFSFKNRFSLSFSNIDWQVSTLHSLSKWSCSVKQENHTYACIQTHSYITISEYLFSCPKVLECKLCWVNKTLIVLFYHGQRSWNQQWCGFDSTTRIQLLCVEVGSTCSRPQDDDTLGIICCRALTRQLLKHPLGDPHSLPGTASQKQNEVCVIPSSYESGMVSVWPLSIWLQVFVFLKAEKIFQWFHTDLCTQFMCICQLRRKPDFNYINQCLFFGHVVHLQGGEGSSWSCMDVESCSGSLIHRLSQSFSTLPHSELITLINLKIAGIPSTHQGNLFVLLKAVTFCDRLMNKGQITLTLLLFDCICLQILSCMLPIETFFLYWDIKYFSLKGCEDSAQRNVYAGNLTFRMRCNRSVRHHHSQKIHTQLATVGNTLVYSQYIQLRSIPTTQCTQRWTSQVIKDNKVSRFITVCILRQEENVICKQWKVNFFRAQHAGWGKSSKPLSHLAFGYPGQDDTSRSDLSPVETNAESSL
|
162
|
+
>isotig00010_f3
|
163
|
+
NEKNLAKKSPCKLKTKMAFTQINSKWRMFLKQYMQYSHSLENSNKIHLSCSFLPPAKLTTVNYVDWMALTGYLSYLPIGCGINTTVYGHGECSKFKGIEDSPRTACRALKKLTARKLRFLLVTTKCRHNVIPCYLLSRPEMSIAAMYTVLWVLTVAVCIDCTLVSNYRRQAVCVFSGINGGVHFCYTSFETRSDYRHRRFFVPSLHTLLRRSILYPINRERIMFQLVACNLKFCKHIQSNNNKVHYKVNLLPFISHSVTSEGHSFQLYKQIPLMGAWLDSCNFQVYQCNKFMWQCGRSFGIICGDCQTINKIPHPCNSNCFLHLLADVPHVQKKVSIGCNNLASSAGKYTIVKYRGQCETTERSSQLSRKQRLEAKLTEATLIPFHSHKMKVSHTLHFASAMQCLVDCEGHPMDVLTTAWFIELCNRFILMSSRHPVMALSKLNPLQHKAAIEFLSRYCQIHITIIAWFQLRPKSTMRVFFTQQSLHSSTFGQLISILFLVCLSKSDILSRFVCSQRAQCAKLLYIV
|
164
|
+
>isotig00012_f3
|
165
|
+
LKVEKLVTIFHSSIFGSYILIWVSPCTDEVYTVESLQLYIKGIEDSPKPCAPSEKVNFPLLASEVFFLSLQNADSNETYLVIFYLDLRCPRCYVHCVVGIDRSCMYLYTSVLPTVASCVCIFWDQWWCLTLLLHLIRNVRLPATFLCAESSHPFKEKYFISHRKNYVSIGSMQLKILQTLDTIKQQGSLQGSTALYFTLVCHIRRSQLSIIIQTNSLDGCLVRFLQFSSLSMVLNVAMWKKLWDNLWIRLPDYQQDSTSMQLQLLPSPPCRCTTCPKKSQHWLMLKSGFLLSWQIHMNCQVQRSVNHKIFSAFKKTKTSQIDRGHTDTIPLSDEGITHTSFCFCDAVPGRLGSPNGCFNNCLVYRALQQIIHPNVSSSCHGLEQVEPTSTQSSYILVVLSNPHHHHCLVPATLTMIKEYNEGFLIHPTELAFQYLWAADIHFVPRMPVKIHSESLRLFATSTVCKAAIYC
|
166
|
+
>isotig00015_f4
|
167
|
+
RRPYVARVKVINQWLSLELFLQIKVSTKKITVFEGGHRYERSTGQTRTILTAFHPISRTRIEPQTILHCGRTGHSSWDRGVEHTFTTMLPLYNTSFVSSMLPWCMEFCNQQKSLSSVSIATSIRAASMELQGVNVTFYCSVVRNHVCQHGVALKGCEDSRKKNVCRSDLVSNEVQKCTLPSIPENTNTACYRRLDTSVQSIHTATVNTHNTVYIAAMDISGLDKRQGITFHCLHFVVTRRKRNLRAVNFFRARHAVRGESSMPLNLLHSQPQCPYTVVLMPHYPIGRDKYPVSAIQSTLTVVSFAGGKNEQDKCILLLFSNEWLYCIYCFKNILHLLLICVKAILVFNLQGDFFAKFHFSF
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
|
3
|
+
require 'npsearch/arg_validator'
|
4
|
+
|
5
|
+
# Class to test the how well the CLI arguments are validated.
|
6
|
+
class TestInputArgumentValidator < Minitest::Test
|
7
|
+
def setup
|
8
|
+
@c = NpSearch::ArgumentsValidators
|
9
|
+
@opt = { num_threads: 1, min_orf_length: 30 }
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_assert_file_present
|
13
|
+
@c.send(:assert_file_present, 'existing file',
|
14
|
+
'test/files/genetic.fa', 1)
|
15
|
+
assert_raises(SystemExit) do
|
16
|
+
@c.send(:assert_file_present, 'non-existing file',
|
17
|
+
'test/files/nope_dont_exist.fa', 1)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_assert_input_file_not_empty
|
22
|
+
f = 'test/files/genetic.fa'
|
23
|
+
@c.send(:assert_input_file_not_empty, f)
|
24
|
+
f = 'test/files/empty_file.fa'
|
25
|
+
assert_raises(SystemExit) { @c.send(:assert_input_file_not_empty, f) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_assert_input_file_probably_fasta
|
29
|
+
f = 'test/files/genetic.fa'
|
30
|
+
@c.send(:assert_input_file_probably_fasta, f)
|
31
|
+
f = 'test/files/not_fasta.fa'
|
32
|
+
assert_raises(SystemExit) { @c.send(:assert_input_file_probably_fasta, f) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_assert_input_sequence
|
36
|
+
f = 'test/files/genetic.fa'
|
37
|
+
@c.send(:assert_input_sequence, f)
|
38
|
+
f = 'test/files/protein.fa'
|
39
|
+
@c.send(:assert_input_sequence, f)
|
40
|
+
f = 'test/files/mixed_content.fa'
|
41
|
+
assert_raises(SystemExit) { @c.send(:assert_input_sequence, f) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_check_num_threads
|
45
|
+
[1, 50, 300].each do |t|
|
46
|
+
@c.send(:check_num_threads, t)
|
47
|
+
end
|
48
|
+
assert_equal(1, @c.send(:check_num_threads, -3))
|
49
|
+
end
|
50
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'minitest/autorun'
|