npsearch 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +3 -2
- data/Rakefile +14 -5
- data/bin/npsearch +45 -33
- data/lib/npsearch/arg_validator.rb +70 -241
- data/lib/npsearch/output.rb +6 -5
- data/lib/npsearch/pool.rb +1 -1
- data/lib/npsearch/scoresequence.rb +62 -60
- data/lib/npsearch/sequence.rb +12 -9
- data/lib/npsearch/signalp.rb +29 -10
- data/lib/npsearch/version.rb +1 -1
- data/lib/npsearch.rb +27 -52
- data/npsearch.gemspec +2 -1
- data/templates/contents.slim +3 -3
- data/test/files/mixed_content.fa +167 -0
- data/test/test_argument_validator.rb +50 -0
- data/test/test_helper.rb +1 -0
- data/test/test_sequence.rb +81 -0
- data/test/test_sequence_scoring.rb +142 -0
- metadata +27 -17
- data/test/files/1_protein.fa +0 -204
- data/test/files/2_orf.fa +0 -1330
- data/test/files/3_signalp_out.txt +0 -667
- data/test/files/4_secretome.fa +0 -6
- data/test/files/5_output.fa +0 -6
- data/test/files/5_output.html +0 -37
- data/test/test_np_search.rb +0 -122
@@ -1,108 +1,110 @@
|
|
1
|
-
|
2
|
-
require 'tempfile'
|
3
|
-
|
1
|
+
# Top level module / namespace.
|
4
2
|
module NpSearch
|
5
3
|
# A class to score the Sequences
|
6
4
|
class ScoreSequence
|
7
5
|
class << self
|
8
|
-
DI_CLV
|
9
|
-
MONO_NP_CLV_2 = '[KR]..R'
|
10
|
-
MONO_NP_CLV_4 = '[KR]....R'
|
11
|
-
MONO_NP_CLV_6 = '[KR]......R'
|
6
|
+
DI_CLV = 'KR|RR|KK'.freeze
|
7
|
+
MONO_NP_CLV_2 = '[KR]..R'.freeze
|
8
|
+
MONO_NP_CLV_4 = '[KR]....R'.freeze
|
9
|
+
MONO_NP_CLV_6 = '[KR]......R'.freeze
|
12
10
|
NP_CLV = "(#{DI_CLV})|(#{MONO_NP_CLV_2})|(#{MONO_NP_CLV_4})|" \
|
13
|
-
"(#{MONO_NP_CLV_6})"
|
11
|
+
"(#{MONO_NP_CLV_6})".freeze
|
14
12
|
|
15
|
-
def run(sequence)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
acidic_spacers
|
13
|
+
def run(sequence, opt)
|
14
|
+
split_into_potential_neuropeptides(sequence)
|
15
|
+
count_np_cleavage_sites(sequence)
|
16
|
+
count_c_terminal_glycines(sequence)
|
17
|
+
np_similarity(sequence, opt[:temp_dir])
|
18
|
+
acidic_spacers(sequence)
|
22
19
|
end
|
23
20
|
|
24
21
|
private
|
25
22
|
|
26
|
-
def
|
23
|
+
def split_into_potential_neuropeptides(sequence)
|
27
24
|
potential_nps = []
|
28
|
-
results =
|
25
|
+
results = sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
|
29
26
|
headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
|
30
27
|
di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
|
31
28
|
results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
|
32
|
-
|
29
|
+
sequence.potential_cleaved_nps = potential_nps
|
33
30
|
end
|
34
31
|
|
35
|
-
def count_np_cleavage_sites
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
32
|
+
def count_np_cleavage_sites(sequence)
|
33
|
+
return if sequence.potential_cleaved_nps.empty?
|
34
|
+
sequence.potential_cleaved_nps.each do |e|
|
35
|
+
count_dibasic_np_clv(sequence, e[:di_clv_end])
|
36
|
+
count_mono_basic_np_clv(sequence, e[:mono_2_clv_end],
|
37
|
+
e[:mono_4_clv_end], e[:mono_6_clv_end])
|
40
38
|
end
|
41
39
|
end
|
42
40
|
|
43
|
-
def count_dibasic_np_clv(dibasic_clv)
|
41
|
+
def count_dibasic_np_clv(sequence, dibasic_clv)
|
44
42
|
case dibasic_clv
|
45
43
|
when 'KR'
|
46
|
-
|
44
|
+
sequence.score += 0.09
|
47
45
|
when 'RR', 'KK'
|
48
|
-
|
46
|
+
sequence.score += 0.05
|
49
47
|
end
|
50
48
|
end
|
51
49
|
|
52
|
-
def count_mono_basic_np_clv(mono_2_clv, mono_4_clv, mono_6_clv)
|
50
|
+
def count_mono_basic_np_clv(sequence, mono_2_clv, mono_4_clv, mono_6_clv)
|
53
51
|
return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
|
54
|
-
|
52
|
+
sequence.score += 0.02
|
55
53
|
end
|
56
54
|
|
57
55
|
# Counts the number of C-terminal glycines
|
58
|
-
def count_c_terminal_glycines
|
59
|
-
|
60
|
-
|
61
|
-
|
56
|
+
def count_c_terminal_glycines(sequence)
|
57
|
+
return if sequence.potential_cleaved_nps.empty?
|
58
|
+
sequence.potential_cleaved_nps.each do |e|
|
59
|
+
if e[:np] =~ /FG$/ && e[:di_clv_end] == 'KR'
|
60
|
+
sequence.score += 0.40
|
61
|
+
elsif e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
|
62
|
+
sequence.score += 0.25
|
62
63
|
elsif e[:np] =~ /G$|GK$|GR$/
|
63
|
-
|
64
|
+
sequence.score += 0.10
|
64
65
|
end
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
69
|
+
# Adds 0.10 if the acidic spacer is detected.
|
70
|
+
# Acidic Spacer is defined as being less than 25% of the precursor length
|
71
|
+
# (not including the Signalp) && having more than 50% D and E amino acids.
|
72
|
+
def acidic_spacers(sequence)
|
73
|
+
sequence.potential_cleaved_nps.each do |e|
|
74
|
+
next if e[:np].length / sequence.seq.length > 0.25
|
75
|
+
sequence.score += 0.10 if e[:np].count('DE') / e[:np].length > 0.5
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
76
|
-
def np_similarity
|
77
|
-
results
|
78
|
-
results.
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
if
|
83
|
-
|
79
|
+
def np_similarity(sequence, temp_dir, results = nil)
|
80
|
+
results = run_cdhit(sequence, temp_dir) if results.nil?
|
81
|
+
clusters = results.split(/^>Cluster \d+\n/)
|
82
|
+
clusters.each do |c|
|
83
|
+
next if c.nil?
|
84
|
+
no_of_seqs_in_cluster = c.split("\n").length
|
85
|
+
if no_of_seqs_in_cluster > 1
|
86
|
+
sequence.score += (0.15 * no_of_seqs_in_cluster)
|
84
87
|
end
|
85
88
|
end
|
86
89
|
end
|
87
90
|
|
88
|
-
def
|
89
|
-
f = Tempfile.new('
|
90
|
-
fo = Tempfile.new('
|
91
|
-
|
92
|
-
`
|
93
|
-
IO.read(fo.path)
|
94
|
-
ensure
|
95
|
-
f.unlink
|
96
|
-
fo.unlink
|
91
|
+
def run_cdhit(sequence, temp_dir)
|
92
|
+
f = Tempfile.new('clust', temp_dir)
|
93
|
+
fo = Tempfile.new('clust_out', temp_dir)
|
94
|
+
return unless write_potential_peptides_to_tempfile(sequence, f)
|
95
|
+
`cd-hit -c 0.5 -n 3 -l 4 -i #{f.path} -o #{fo.path}`
|
96
|
+
IO.read("#{fo.path}.clstr")
|
97
97
|
end
|
98
98
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
-
|
99
|
+
def write_potential_peptides_to_tempfile(sequence, tempfile)
|
100
|
+
return false if sequence.potential_cleaved_nps.empty?
|
101
|
+
sequences = ''
|
102
|
+
sequence.potential_cleaved_nps.each_with_index do |e, i|
|
103
|
+
sequences += ">seq#{i}\n#{e[:np]}\n"
|
103
104
|
end
|
104
|
-
tempfile.write(
|
105
|
+
tempfile.write(sequences)
|
105
106
|
tempfile.close
|
107
|
+
true
|
106
108
|
end
|
107
109
|
end
|
108
110
|
end
|
data/lib/npsearch/sequence.rb
CHANGED
@@ -1,23 +1,26 @@
|
|
1
|
+
# Top level module / namespace.
|
1
2
|
module NpSearch
|
3
|
+
# Adapted from GeneValidator's Query Class..
|
2
4
|
# A class to hold sequence data
|
3
5
|
class Sequence
|
4
|
-
DI_NP_CLV
|
5
|
-
MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'
|
6
|
+
DI_NP_CLV = 'KR|KK|RR'.freeze
|
7
|
+
MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'.freeze
|
6
8
|
|
7
9
|
attr_reader :id
|
10
|
+
attr_reader :defline
|
8
11
|
attr_reader :signalp
|
9
12
|
attr_reader :seq
|
10
13
|
attr_reader :html_seq
|
11
|
-
attr_reader :signalp_output
|
12
14
|
attr_reader :translated_frame
|
13
15
|
attr_accessor :score
|
14
16
|
attr_accessor :potential_cleaved_nps
|
15
17
|
|
16
|
-
def initialize(
|
17
|
-
@id =
|
18
|
-
|
19
|
-
|
20
|
-
@
|
18
|
+
def initialize(entry, sp, frame = nil)
|
19
|
+
@id = entry.entry_id
|
20
|
+
@defline = entry.definition
|
21
|
+
sp_cleavage_site_idx = sp[:ymax_pos].to_i - 1
|
22
|
+
@signalp = sp[:orf][0..(sp_cleavage_site_idx - 1)]
|
23
|
+
@seq = sp[:orf][sp_cleavage_site_idx..-1]
|
21
24
|
@html_seq = format_seq_for_html
|
22
25
|
@translated_frame = frame
|
23
26
|
@score = 0
|
@@ -27,7 +30,7 @@ module NpSearch
|
|
27
30
|
def format_seq_for_html
|
28
31
|
seq = @seq.gsub(/C/, '<span class=cysteine>C</span>')
|
29
32
|
seq.gsub!(/#{DI_NP_CLV}/i, '<span class=np_clv>\0</span>')
|
30
|
-
seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target R
|
33
|
+
seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target 'R'
|
31
34
|
seq.gsub!('R::NP_CLV::', '<span class=mono_np_clv>R</span>')
|
32
35
|
seq.gsub!('G<span class=np_clv>',
|
33
36
|
'<span class=glycine>G</span><span class=np_clv>')
|
data/lib/npsearch/signalp.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'forwardable'
|
2
|
-
require 'tempfile'
|
3
2
|
|
3
|
+
# Top level module / namespace.
|
4
4
|
module NpSearch
|
5
5
|
# A class to hold sequence data
|
6
6
|
class Signalp
|
@@ -10,15 +10,34 @@ module NpSearch
|
|
10
10
|
|
11
11
|
def analyse_sequence(seq)
|
12
12
|
sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
|
13
|
-
sp dmaxcut networks)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
sp dmaxcut networks orf)
|
14
|
+
data = setup_analysis(seq)
|
15
|
+
orf_results = []
|
16
|
+
s = `echo "#{data[:fasta]}\n" | #{opt[:signalp_path]} -t euk \
|
17
|
+
-f short -U 0.34 -u 0.34`
|
18
|
+
sp_results = s.split("\n").delete_if { |l| l[0] == '#' }
|
19
|
+
sp_results.each_with_index do |line, idx|
|
20
|
+
line = line + ' ' + data[:seq][idx].to_s
|
21
|
+
orf_results << Hash[sp_headers.map(&:to_sym).zip(line.split)]
|
22
|
+
end
|
23
|
+
orf_results.sort_by { |h| h[:d] }.reverse[0]
|
24
|
+
end
|
25
|
+
|
26
|
+
def setup_analysis(seq)
|
27
|
+
if opt[:type] == :protein
|
28
|
+
data = { seq: [seq], fasta: ">seq\n#{seq}" }
|
29
|
+
else
|
30
|
+
orfs = seq.scan(/(?=(M\w+))./).flatten
|
31
|
+
orfs.unshift(seq)
|
32
|
+
data = { seq: orfs, fasta: create_orf_fasta(orfs) }
|
33
|
+
end
|
34
|
+
data
|
35
|
+
end
|
36
|
+
|
37
|
+
def create_orf_fasta(m_orf)
|
38
|
+
fasta = ''
|
39
|
+
m_orf.each_with_index { |seq, idx| fasta << ">#{idx}\n#{seq}\n" }
|
40
|
+
fasta
|
22
41
|
end
|
23
42
|
end
|
24
43
|
end
|
data/lib/npsearch/version.rb
CHANGED
data/lib/npsearch.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'bio'
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
|
-
|
4
|
+
require 'npsearch/arg_validator'
|
5
5
|
require 'npsearch/output'
|
6
6
|
require 'npsearch/pool'
|
7
7
|
require 'npsearch/scoresequence'
|
@@ -11,33 +11,39 @@ require 'npsearch/signalp'
|
|
11
11
|
# Top level module / namespace.
|
12
12
|
module NpSearch
|
13
13
|
class <<self
|
14
|
-
MIN_ORF_SIZE = 30 # amino acids (including potential signal peptide)
|
15
|
-
|
16
14
|
attr_accessor :opt
|
17
15
|
attr_accessor :sequences
|
18
16
|
attr_reader :sorted_sequences
|
19
17
|
|
20
18
|
def init(opt)
|
21
|
-
|
22
|
-
@
|
23
|
-
@sequences = []
|
19
|
+
@opt = ArgumentsValidators.run(opt)
|
20
|
+
@sequences = []
|
24
21
|
@sorted_sequences = nil
|
25
|
-
@opt[:
|
26
|
-
|
22
|
+
@pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
|
23
|
+
FileUtils.mkdir_p(@opt[:temp_dir])
|
24
|
+
extract_orf if @opt[:type] == :genetic
|
27
25
|
end
|
28
26
|
|
29
27
|
def run
|
30
|
-
|
28
|
+
input_file = @opt[:type] == :protein ? @opt[:input_file] : @opt[:orf]
|
29
|
+
iterate_input_file(input_file)
|
31
30
|
@sorted_sequences = @sequences.sort_by(&:score).reverse
|
32
31
|
Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
|
33
32
|
Output.to_html(@opt[:input_file])
|
33
|
+
remove_temp_dir
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
# Uses getorf from EMBOSS package to extract all ORF
|
39
|
+
def extract_orf(input = @opt[:input_file], minsize = 90)
|
40
|
+
@opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
|
41
|
+
system "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
|
42
|
+
" -minsize #{minsize} >/dev/null 2>&1"
|
43
|
+
end
|
44
|
+
|
45
|
+
def iterate_input_file(input_file)
|
46
|
+
Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
|
41
47
|
if @opt[:num_threads] > 1
|
42
48
|
@pool.schedule(entry) { |e| initialise_seqs(e) }
|
43
49
|
else
|
@@ -48,49 +54,18 @@ module NpSearch
|
|
48
54
|
end
|
49
55
|
|
50
56
|
def initialise_seqs(entry)
|
51
|
-
if @opt[:
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
def initialise_protein_seq(id, seq)
|
59
|
-
sp = Signalp.analyse_sequence(seq)
|
60
|
-
return unless sp[:sp] == 'Y'
|
61
|
-
seq = Sequence.new(id, seq, sp)
|
62
|
-
ScoreSequence.run(seq)
|
57
|
+
return if entry.aaseq.length > @opt[:max_seq_length]
|
58
|
+
sp = Signalp.analyse_sequence(entry.aaseq)
|
59
|
+
return if sp[:sp] == 'N'
|
60
|
+
# seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
|
61
|
+
seq = Sequence.new(entry, sp)
|
62
|
+
ScoreSequence.run(seq, @opt)
|
63
63
|
@sequences << seq
|
64
64
|
end
|
65
65
|
|
66
|
-
def
|
67
|
-
(
|
68
|
-
|
69
|
-
orfs = translated_seq.to_s.scan(/(?=(M\w{#{MIN_ORF_SIZE},}))./).flatten
|
70
|
-
initialise_orfs(id, orfs, f)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def initialise_orfs(id, orfs, frame)
|
75
|
-
orfs.each do |orf|
|
76
|
-
sp = Signalp.analyse_sequence(orf)
|
77
|
-
next if sp[:sp] == 'N'
|
78
|
-
seq = Sequence.new(id, orf, sp, frame)
|
79
|
-
ScoreSequence.run(seq)
|
80
|
-
@sequences << seq
|
81
|
-
# The remaining ORF in this frame are simply shorter versions of the
|
82
|
-
# same orf so break loop once signal peptide is found.
|
83
|
-
break if sp[:sp] == 'Y'
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def guess_sequence_type
|
88
|
-
fasta_content = IO.binread(@opt[:input_file])
|
89
|
-
# removing non-letter and ambiguous characters
|
90
|
-
cleaned_sequence = fasta_content.gsub(/[^A-Z]|[NX]/i, '')
|
91
|
-
return nil if cleaned_sequence.length < 10 # conservative
|
92
|
-
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
93
|
-
(type == Bio::Sequence::NA) ? :nucleotide : :protein
|
66
|
+
def remove_temp_dir
|
67
|
+
return unless File.directory?(@opt[:temp_dir])
|
68
|
+
FileUtils.rm_rf(@opt[:temp_dir])
|
94
69
|
end
|
95
70
|
end
|
96
71
|
end
|
data/npsearch.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
|
|
16
16
|
' For more information: https://github.com/wurmlab/npsearch'
|
17
17
|
s.summary = 'Search for neuropeptides based on the common' \
|
18
18
|
' neuropeptides markers'
|
19
|
-
s.homepage = 'https://github.com/
|
19
|
+
s.homepage = 'https://github.com/wurmlab/npsearch'
|
20
20
|
s.license = 'AGPL'
|
21
21
|
|
22
22
|
s.files = `git ls-files -z`.split("\x0")
|
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
|
|
28
28
|
s.add_development_dependency 'bundler', '~> 1.6'
|
29
29
|
s.add_development_dependency 'rake', '~>10.3'
|
30
30
|
s.add_development_dependency 'coveralls'
|
31
|
+
s.add_development_dependency 'minitest', '~> 5.4'
|
31
32
|
|
32
33
|
s.add_dependency 'bio', '~> 1.4'
|
33
34
|
s.add_dependency 'slim', '~> 3.0'
|
data/templates/contents.slim
CHANGED
@@ -29,9 +29,9 @@ html lang="en"
|
|
29
29
|
p.sequence
|
30
30
|
span.id
|
31
31
|
- if @opt[:type] == :protein
|
32
|
-
| >#{seq.
|
32
|
+
| >#{seq.defline}
|
33
33
|
- elsif @opt[:type] == :nucleotide
|
34
|
-
| >#{seq.
|
34
|
+
| >#{seq.defline}-(frame:#{seq.translated_frame})
|
35
35
|
br
|
36
36
|
span.seq== seq.html_seq
|
37
37
|
br
|
@@ -50,5 +50,5 @@ html lang="en"
|
|
50
50
|
a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
|
51
51
|
br
|
52
52
|
| This page was created by
|
53
|
-
a href="https://github.com/wurmlab/
|
53
|
+
a href="https://github.com/wurmlab/npsearch" target="_blank" NpSearch
|
54
54
|
| v#{NpSearch::VERSION}
|
@@ -0,0 +1,167 @@
|
|
1
|
+
>isotig00006 gene=isogroup00003 length=1747 numContigs=6
|
2
|
+
AGTTAAAAGTTGAAAAATTGGTGACCATATTTTGACACTCTAGCATATTTGGGAGCTATA
|
3
|
+
TACTGATTTGGGTTTCACCATGCACAGATGAGGTATATACATAAGTTGAAAGCCTGCAGC
|
4
|
+
TCTATATTAAAGGCATTGAAGACtcGCCcAAaccgtgTGcgcccTCTGAAAAaGTTAACT
|
5
|
+
TTCcGTTgCTTGCAaGTGAAGTTTtcTtCTTGTCGCTACAAAATGCAGACAGTAATGAAA
|
6
|
+
|
7
|
+
>isotig00007 gene=isogroup00003 length=1749 numContigs=5
|
8
|
+
TGTGTGTGTGTGGTGCTTCCccTCTAGGGCTGTAAATTTCAAAGGAACCTTGCGCAAGAA
|
9
|
+
CAGtAGCTTGCGaCGTTTTTCAAaaCCAGAGGTTCTGAACTGAACTGTACTGACTACTGT
|
10
|
+
AGGGtacTTAAaGGCATTGAAGACTCGCCcAAaCCatgTGCCGCGctttGAAAAAGTTAA
|
11
|
+
CTTTCCGTTGCTTGCAAATGAcGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgA
|
12
|
+
AACGTGATACcTtGTtATCTTTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTG
|
13
|
+
TGTTGTGGgTATTGACcGTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGAC
|
14
|
+
GGTAGCAAGCTGTGTGTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACA
|
15
|
+
CCtCAtTcGAAACTAGGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACA
|
16
|
+
cccttttaaggagaagtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGG
|
17
|
+
TAGCATGCAACTTAAAAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATT
|
18
|
+
ACAAGGttAAtCtacTGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCT
|
19
|
+
TtCAaTAaTTATACAAACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTC
|
20
|
+
AAGTTTATCAATGTAATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCT
|
21
|
+
GTGGATAAGACTGCCAGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCC
|
22
|
+
TTCACCTCCTTGCAGATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATT
|
23
|
+
AAAATCTGGCTTCCTcCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGT
|
24
|
+
GTGAAACCACTGAAAGATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGA
|
25
|
+
CAGAGGCCACACTGATACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATT
|
26
|
+
TTGCTTCTGCGATGCAGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAa
|
27
|
+
CTGCCtGGTTtAtAGAGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTG
|
28
|
+
TCATGGCCTTGAGCAAGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTA
|
29
|
+
GGTACTGTcAAATCCACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAA
|
30
|
+
AGAGTACAATGAGGGTTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGC
|
31
|
+
AGCTGaaaaGATATTCAGAAaTTGTTATATATGAGTGTGTTTGTATGCATGCAtATGtGT
|
32
|
+
GATTTtCTtGCTTTACAGAACAGCTCCaTTTTGATAAGCTAtgTAAcgtGgAAACCTGCC
|
33
|
+
AATCAaTGTTtgAAataGGAcaGgCTGAAACGATTCTTAAATGAAAAGCTTAAtgaCTTc
|
34
|
+
TTgCAtttttaTACATCACTGTTCAGGtAaGGCCAGTAAGGgCAGTATgAaGAAtAaGTA
|
35
|
+
ACAATtAATAATTATCATTATGGCCATTTGCTGtcTGCATAAtAaCAAACTGAATGATGT
|
36
|
+
CATCAGCCCTgTGCTCAGTTGACAgAACTGACAAGTAGGCACACaaTGTCAGTGTGATCC
|
37
|
+
ATGAAACCT
|
38
|
+
>isotig00008 gene=isogroup00003 length=1726 numContigs=6
|
39
|
+
AGGTTTCATGGATCACACTGACAtTGTGTGCCTACTTGTCAGTTcTGTCAACTGAGCAcA
|
40
|
+
GGGCTGATGACATCATTCAGTTTGttattATGCAggaCAGCAAATGGCCATAATGATAAT
|
41
|
+
TATTAaTTGTTACTtaTTCTtcATACTGCCcTTACTGGCCTtaCCTGAACAGTGATGTAt
|
42
|
+
caaaaTGcAAgAAGtcaTTAAGCTTTTCATTTAAGAATCGTTTCAGCctgTCCtaatTTt
|
43
|
+
cAAaCAtTGATTGGCAGGTTTCcacgTTAcaTAGCTTATCAAAAtGGAGCTGTTCTGTAA
|
44
|
+
AGCAAGaAAATCACaCATaTGCATGCATACAAACACACTCATATATAACAAtTTCTGAAT
|
45
|
+
ATCTtttCAGCTGCCCAAAGGTACTGGAATGCAAGCTCTGTTGGGTGAATTAAAAAaCCc
|
46
|
+
TCATTGTACTCTTTTATCATGGTCAGCGTAGCTGGAACCAGCAATGATGGTGATGTGGAT
|
47
|
+
TTGACAGTACCTAGACAAGAATTCAATAGCTGCTTTGTGTTGAAGTGGGTTCAACTTGCT
|
48
|
+
CAAGGCCATGACAGGATGACGACTAGACATtAGGATGAATTATCTGTTGCAGAGCTCTAT
|
49
|
+
AAaCCAGGCAGTtGTtAAAaCATCCATTGGGTGACCcTCACAGTCTACCAGGCACTGCAT
|
50
|
+
CGCAGAAGCAAAATGAAGTGTGTGTgATACCTTCATCTTATGAGAGTGGAATGGTATCAG
|
51
|
+
TGTGGCCTCTGTCAATTTGGCTTCAAGTCTTTGTTTTCTTGAAAGCTGAGAaGATCTTTC
|
52
|
+
AGTGGTTTCACACTGACCTCTGTACTTGACAATTCATGTGTATTTGCCAGCTCAGgAGGA
|
53
|
+
AGCCAGATTTTAATTACATTAACCAATGCTGACTTTTTTttGGACATGTGGTACATCTGC
|
54
|
+
AAGGAGGTGAAGGAAGCAGTTGGAGTTGCATGGATGTGGAATCTTGTTGATAGTCTGGCA
|
55
|
+
GTCTTATCCACAGATTATCCCAAAGCTTCTCCACATTGCCACATTCAGAACTTATTACAT
|
56
|
+
TGATAAACTTGAAAATtGCAGGAATCTAaCcAaGCACCcATCAaGGGAaTTTGTTTGTAT
|
57
|
+
AATtATtGAAaGCTGTGACcTTCTGATGTGACAGACTAATGTGAAaTAAAGGgCAgtaGa
|
58
|
+
TTaCCTTGTaaTGAACCttGTTATTGTTTGATTGTATCTAAtGTTTGCAaaTTTTAAGTT
|
59
|
+
GCATGCTACCAATTGAAACATAATTCTTTCTCTAttaatgggatataaaatacttctcct
|
60
|
+
taaaagggTGTgAaGACTcggCACAAAGAAACGTCtaTGCcGgtAaTCTGACCTAGTTTc
|
61
|
+
gAatGaGGTGTAACagAAGTgTtAGACACcACCAttGATCCcAGAAAATACACACACAGC
|
62
|
+
TTGCTACCGTCGGTAaTTAGACACTAGTGTACAGTCAaTACATACAGCTAcGgTCAATAC
|
63
|
+
CCACAaCACAgTGTAcATAGCAGCGaTGgACATCTCAGGTCTAGATAAAAGATAaCAAGG
|
64
|
+
TATCACGTTTCATtaCTGTCTGCATTTtGTAGCgaCAagAAGAAAAcgtCATTtGCAAGC
|
65
|
+
AaTGgAAAGTtAACTTTTTCaGAGCGcagCAcGCgggTTGGGGCAAGTCTTCCAAGCCTT
|
66
|
+
TAAGTtGACAtcTTGCCTTTGGCTATCCAGGgTGACAAGATGATACTAGCAGGTAgagtg
|
67
|
+
actaattgagccctgtgtgagaaaccaatgcagaatctagcctagt
|
68
|
+
>isotig00009 gene=isogroup00003 length=1827 numContigs=2
|
69
|
+
TAGCTGTGATCTAGTGGATCTGACTGGCCTTTTGATTATTTCAGCacGATTCTCAGACTA
|
70
|
+
CAGTTGTAAaCCTACTTCGACTACTACTACTActagtacTAACGGTGCAACGTTGTTATA
|
71
|
+
AGTTTGCCAAAGGTGAAACTTTAGCCTTAGGACtGTGTTTATTTTATTTGCAGTCGCATT
|
72
|
+
CgCCTAACTGTTTTCTGTTACTGGGTGCATTTAACTCACATTAATAGAGGATTTTtGACT
|
73
|
+
AGTtCcTAGAGAGTGGTGTTTCTGTTTTACCACCATGGCAAAAAAGGGAAaGCCTCGCCC
|
74
|
+
TGACCATAGGCCTCCTGCACACAACCCGCATTATGCTCATGATCCACCACCTTATTCACA
|
75
|
+
ACAGCAACCACCACTTCAACAGCAGAACTATGCACAACAAATGCATCATGGTGGAGGTGG
|
76
|
+
TGGAAATAGACAACATGCACGACcTAGACCTAGTCCACCTTCAGAAGTCAGTGACTGTGT
|
77
|
+
CAAGTACTCCCTTTTCTtGTATAACTGCATCTTTTGgaTTGtCGGCCTTttCTTTATtGC
|
78
|
+
AGCAGGTATCTGGgCATTTCACGATAGGGGTGTTTTTAATGAATTCCAGTCACTTAGTAC
|
79
|
+
CAATGAGGTCTCCTTTCTCACTGATCCTGTTATTTGGCTGTTCGTCCTCGGAGGTGTAGT
|
80
|
+
TTTCATGCTGGGAACCCTCGGATGTCTgGGGgCCCTCAGAGAAAaTATCTGCATGCTGAA
|
81
|
+
GTGTTTTAGCATAATCATGGGGCTTATACTGCTGCTGGAAATTGGAGGTGGATGTGCGAT
|
82
|
+
ATACTTCTATCGTGCACAGATTCAGGCACAGTTTCAAAAGTCCTTAACAGATGTGaCCAT
|
83
|
+
AACAGATTACAGAGAAAATGCTGATTTCCAGGATCTCATAGACGCATTACAATCCGGTCT
|
84
|
+
TTCTTGTTGTGGTGTCAATTCCTatGAAGACTGGGATAATAATATTTATTTCAACTGTAG
|
85
|
+
TGGTCCTGCCAATAACCCTGAAGCcttGTGGTGTGCCTTtCTCCTGTTGTATACCGGATC
|
86
|
+
AAGCAAGCGGAGTAGCCAACACCCAGTGCGGTTATGGAGTTCGTTCCCCCGAACAACAAA
|
87
|
+
ATACTTTCCACACAAAGATTTACACCACTGGCTGTGCGGATATGTTTACAATGTGGATTA
|
88
|
+
ATAGGTACCTATATTACATAGCAGGCATTGCTGGGGTCATTGTCTTGGTCGAGTtGTTTG
|
89
|
+
GATTCTGTTTTGCACATTCCCTCATCAACGACATCAAACGCCAAAAGGCCCGCTGGGCGC
|
90
|
+
ATCGATAATTCATTCCAGGATGTTGGTGgATGATGCTACTCAAGGGagAAGACTGACAGT
|
91
|
+
GCCTTTtGGTCAaTATCGTGTAGCATCAGGAAGGAGGTAGTACCTCCTCAACTAACCaTA
|
92
|
+
ACAGAATTTGTCCAGTTTGTAACATCGTCAAGAAATAAACAGACTTTTTTTACCATTAGG
|
93
|
+
ACgTGATAATACTACCACGTAACCTCTCAAAGCACAAAAAGCAAAAAGCAAATATCTCCT
|
94
|
+
TGTTTTAAAATTAGaagGTCTATCTCAGATAACAACCACAGAACATgTGGAGTTTTCCtT
|
95
|
+
TATGCTATCATAAAGATATAAATATATATAAAATTGAGGTAGcATCtTGGCTACCCACCA
|
96
|
+
AAATCATTTTTTTTCCAGTTTGaAACATCATGGAACATTTCAGAACAAAGATCATTTCAG
|
97
|
+
TCGTTACCACACTCAAGAgaTTGCTGTcGTCAaCaTTTtGtaGCTTTTtAAtGTCTTGAT
|
98
|
+
CTTCGTCGACATCGTCAATGTGTAAACTATTCTCGACGAGAGATTAGTGTCTAATACTGC
|
99
|
+
GGGTgATTTGATATAAATCTCACTTGG
|
100
|
+
>isotig00010 gene=isogroup00003 length=1650 numContigs=5
|
101
|
+
TGAATGAGAAAtGAAATTTAGCGAAGAAATCACCTTGTAAATTAAAAACTAAAATGGCTT
|
102
|
+
TCACACAAATTAaCAGTAAAtGgAGAATGTTTTTAAAGCAATATATGCAGTACAGCcATT
|
103
|
+
CATTGGAAAACAGTAAcAAAaTACATTTATCTTGTtcATTTTtACctCctGCAAaacTTA
|
104
|
+
cAaCcGTTAATTATGTAGATTGGATGGCACTAACAGGGTACTTGTCTTATCTGCCTATTG
|
105
|
+
GATAATGTGGcATTAATACTACTGTGTATGGGCACTGAGGCTGAGAGTGCAGTAAGTTtA
|
106
|
+
AAGGCATTGAAGACTCtCCCCGAaCcGCGtGCCGGGCTctGAAAAAGTtAaCTGCTCGCA
|
107
|
+
AaTtAcGTTTtCTtCTTGTCaCTaCAAAaTGCAGACATTaaTGAAACGTGATACCTTGTt
|
108
|
+
ATCTTTTATCTAGACCTGAGATGTCcAtCGCTGCTATgTACAcTGTGTTGTGGGTATTGA
|
109
|
+
CcgTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGT
|
110
|
+
GTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTA
|
111
|
+
GGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaa
|
112
|
+
gtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAA
|
113
|
+
AAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtac
|
114
|
+
TGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACA
|
115
|
+
AACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTA
|
116
|
+
ATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCC
|
117
|
+
AGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAG
|
118
|
+
ATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCT
|
119
|
+
cCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAA
|
120
|
+
GATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGA
|
121
|
+
TACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGC
|
122
|
+
AGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAG
|
123
|
+
AGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCA
|
124
|
+
AGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCC
|
125
|
+
ACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGG
|
126
|
+
TTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTT
|
127
|
+
TGTTCCTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAG
|
128
|
+
CACAGTGTGCAAAGctGCTATATATTGTCC
|
129
|
+
>isotig00011 gene=isogroup00003 length=1525 numContigs=6
|
130
|
+
ACATTCTTCAAGAGCTCTGCACCCACCAATCTAAAGTGACCAGCCAAGTGACTGACCTCA
|
131
|
+
GGGCACAGTTAGCAGCTTTGACCACAGGATGAGCTATGTAACAACTGAAtgaaTGGTGTT
|
132
|
+
CAtcGTTGATTGGGCAgTCAAAACAGCTGAATTTCTCTTGCGgAAGACATAAAGGCATTG
|
133
|
+
AAGACtcGCCcAAaccGtGTGcgcccTCTGAAAAaGTTAACTTTctGTTgCTTGCAaGTG
|
134
|
+
AAGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgAAACGTGATACcTtGTtATCT
|
135
|
+
TTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTGTGTTGTGGgTATTGACcGTA
|
136
|
+
GCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGTGTGTA
|
137
|
+
TTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTAGGTCA
|
138
|
+
GAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaagtatt
|
139
|
+
ttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAAAAtTT
|
140
|
+
TGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtacTGCCC
|
141
|
+
TTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACAAACAA
|
142
|
+
ATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTAATAAG
|
143
|
+
TTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCCAGACT
|
144
|
+
ATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAGATGTA
|
145
|
+
CCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCTcCTGA
|
146
|
+
GCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAAGATCT
|
147
|
+
TCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGATACCA
|
148
|
+
TTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGCAGTGC
|
149
|
+
CTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAGAGCTC
|
150
|
+
TGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCAAGTTG
|
151
|
+
AACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCCACATC
|
152
|
+
ACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGGTTTTT
|
153
|
+
TAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTTTGTTC
|
154
|
+
CTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAGCACAG
|
155
|
+
TGTGCAAAGctGCTATATATTGTCC
|
156
|
+
>isotig00001_f6
|
157
|
+
FRIYKNYALQYVSKDAHLSLLWSAVTHLGTQYFGKTPFVLTYILLTECAVESCMEACIETLINNYEKDKLLPLQYYTSYFLCRHLAKCDYSKTDKILNVCQRNLMAFLLKINPFHLLRHRLAPNRMQPYEKVNFLLLASDVLFVSLQNADSNETRYIVIFSWTDVHRCTVCTLCCGCLQLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITAPFFLRESSHRIMIIGFSNLEPLTKCVTELIFLVTKKTLVHMRERYINMSIRGRVVYLLPDSADNSQLFENTLCYISSSFKAPKPNQQNKKPNEQRNCMTNVNFSKIELLKHISQHISHRANRTELELTVHCTFHFFLYLKVTTKLHRKTKTSSLHFNHKNIFALITKKIRYTIRQTLPYLKPLRLCSTLSVLNCLYSTVLTRQISIIKISKIYYIHNLKDVECLSGVKAGAFQSGPLFESLQDCMSSSYRVVENRQFIIMDVKYETKRLTSVSLRLANEASSRVPACRMTNTYIYITVMRNAYLPKRCSYTRKGSTHSHLLKVDVVVHVVYFHHLHHDAFVVHSSAVEVVVAVVNKVVDHEHNAGCVQEAYGQGEAFPFLPWWNRNTTLELVKNPLLMVKCTQQKTVRRMRLQIKTQSGSFTFGKLITTLHRYSKVYNCSLRIVLKSKGQSDPLDHS
|
158
|
+
>isotig00002_f1
|
159
|
+
MRNEIRRNHLVNKLKWLSHKLTVNGECFSNICSTAIHWKTVTKYIYLVHFYLLQNLQPLIMIGWHQGTCLICLLDNVALILLCMGTEAESAVSLKALKTLPEPRAGLKSLLANYVFFLSLQNADINETYLVIFYLDLRCPSLLCTLCCGYPLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITGIDVSLCRVFTPFGEVFYIPLIEKELCFNWHATNFANIRYNQTITRFITRLIYCPLFHISLSHQKVTAFNNYTNKFPWVLGIPAIFKFINVISSECGNVEEALGSVDKTARLSTRFHIHATPTASFTSLQMYHMSKKKSALVNVIKIWLPPELANTHELSSTEVSVKPLKDLLSFQENKDLKPNQRPHYHSTLIRRYHTHFILLLRCSAWTVRVTQWMFQLPGLSSATDNSSCLVVILSWPASTHFNTKQLLNSCLGTVKSTSPSLLGSSYADHDKRVQGFFNSPNRACIPVPLGSKDIQKLLYMSVFVCMHMCDFLALQNSSILISYVTWKPANQCLKDRLKRFLNEKLNDFLHFYTSLFRGQGQYEEVTINNYHYGHLLSAQTECHQPCAQLTELTSRHTMSVSMKP
|
160
|
+
>isotig00008_f3
|
161
|
+
VSWITLTLCAYLSVLSTEHRADDIIQFVIMQDSKWPLLIVTYSSYCPYWPYLNSDVSKCKKSLSFSFKNRFSLSFSNIDWQVSTLHSLSKWSCSVKQENHTYACIQTHSYITISEYLFSCPKVLECKLCWVNKTLIVLFYHGQRSWNQQWCGFDSTTRIQLLCVEVGSTCSRPQDDDTLGIICCRALTRQLLKHPLGDPHSLPGTASQKQNEVCVIPSSYESGMVSVWPLSIWLQVFVFLKAEKIFQWFHTDLCTQFMCICQLRRKPDFNYINQCLFFGHVVHLQGGEGSSWSCMDVESCSGSLIHRLSQSFSTLPHSELITLINLKIAGIPSTHQGNLFVLLKAVTFCDRLMNKGQITLTLLLFDCICLQILSCMLPIETFFLYWDIKYFSLKGCEDSAQRNVYAGNLTFRMRCNRSVRHHHSQKIHTQLATVGNTLVYSQYIQLRSIPTTQCTQRWTSQVIKDNKVSRFITVCILRQEENVICKQWKVNFFRAQHAGWGKSSKPLSHLAFGYPGQDDTSRSDLSPVETNAESSL
|
162
|
+
>isotig00010_f3
|
163
|
+
NEKNLAKKSPCKLKTKMAFTQINSKWRMFLKQYMQYSHSLENSNKIHLSCSFLPPAKLTTVNYVDWMALTGYLSYLPIGCGINTTVYGHGECSKFKGIEDSPRTACRALKKLTARKLRFLLVTTKCRHNVIPCYLLSRPEMSIAAMYTVLWVLTVAVCIDCTLVSNYRRQAVCVFSGINGGVHFCYTSFETRSDYRHRRFFVPSLHTLLRRSILYPINRERIMFQLVACNLKFCKHIQSNNNKVHYKVNLLPFISHSVTSEGHSFQLYKQIPLMGAWLDSCNFQVYQCNKFMWQCGRSFGIICGDCQTINKIPHPCNSNCFLHLLADVPHVQKKVSIGCNNLASSAGKYTIVKYRGQCETTERSSQLSRKQRLEAKLTEATLIPFHSHKMKVSHTLHFASAMQCLVDCEGHPMDVLTTAWFIELCNRFILMSSRHPVMALSKLNPLQHKAAIEFLSRYCQIHITIIAWFQLRPKSTMRVFFTQQSLHSSTFGQLISILFLVCLSKSDILSRFVCSQRAQCAKLLYIV
|
164
|
+
>isotig00012_f3
|
165
|
+
LKVEKLVTIFHSSIFGSYILIWVSPCTDEVYTVESLQLYIKGIEDSPKPCAPSEKVNFPLLASEVFFLSLQNADSNETYLVIFYLDLRCPRCYVHCVVGIDRSCMYLYTSVLPTVASCVCIFWDQWWCLTLLLHLIRNVRLPATFLCAESSHPFKEKYFISHRKNYVSIGSMQLKILQTLDTIKQQGSLQGSTALYFTLVCHIRRSQLSIIIQTNSLDGCLVRFLQFSSLSMVLNVAMWKKLWDNLWIRLPDYQQDSTSMQLQLLPSPPCRCTTCPKKSQHWLMLKSGFLLSWQIHMNCQVQRSVNHKIFSAFKKTKTSQIDRGHTDTIPLSDEGITHTSFCFCDAVPGRLGSPNGCFNNCLVYRALQQIIHPNVSSSCHGLEQVEPTSTQSSYILVVLSNPHHHHCLVPATLTMIKEYNEGFLIHPTELAFQYLWAADIHFVPRMPVKIHSESLRLFATSTVCKAAIYC
|
166
|
+
>isotig00015_f4
|
167
|
+
RRPYVARVKVINQWLSLELFLQIKVSTKKITVFEGGHRYERSTGQTRTILTAFHPISRTRIEPQTILHCGRTGHSSWDRGVEHTFTTMLPLYNTSFVSSMLPWCMEFCNQQKSLSSVSIATSIRAASMELQGVNVTFYCSVVRNHVCQHGVALKGCEDSRKKNVCRSDLVSNEVQKCTLPSIPENTNTACYRRLDTSVQSIHTATVNTHNTVYIAAMDISGLDKRQGITFHCLHFVVTRRKRNLRAVNFFRARHAVRGESSMPLNLLHSQPQCPYTVVLMPHYPIGRDKYPVSAIQSTLTVVSFAGGKNEQDKCILLLFSNEWLYCIYCFKNILHLLLICVKAILVFNLQGDFFAKFHFSF
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
|
3
|
+
require 'npsearch/arg_validator'
|
4
|
+
|
5
|
+
# Class to test the how well the CLI arguments are validated.
|
6
|
+
class TestInputArgumentValidator < Minitest::Test
|
7
|
+
def setup
|
8
|
+
@c = NpSearch::ArgumentsValidators
|
9
|
+
@opt = { num_threads: 1, min_orf_length: 30 }
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_assert_file_present
|
13
|
+
@c.send(:assert_file_present, 'existing file',
|
14
|
+
'test/files/genetic.fa', 1)
|
15
|
+
assert_raises(SystemExit) do
|
16
|
+
@c.send(:assert_file_present, 'non-existing file',
|
17
|
+
'test/files/nope_dont_exist.fa', 1)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_assert_input_file_not_empty
|
22
|
+
f = 'test/files/genetic.fa'
|
23
|
+
@c.send(:assert_input_file_not_empty, f)
|
24
|
+
f = 'test/files/empty_file.fa'
|
25
|
+
assert_raises(SystemExit) { @c.send(:assert_input_file_not_empty, f) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_assert_input_file_probably_fasta
|
29
|
+
f = 'test/files/genetic.fa'
|
30
|
+
@c.send(:assert_input_file_probably_fasta, f)
|
31
|
+
f = 'test/files/not_fasta.fa'
|
32
|
+
assert_raises(SystemExit) { @c.send(:assert_input_file_probably_fasta, f) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_assert_input_sequence
|
36
|
+
f = 'test/files/genetic.fa'
|
37
|
+
@c.send(:assert_input_sequence, f)
|
38
|
+
f = 'test/files/protein.fa'
|
39
|
+
@c.send(:assert_input_sequence, f)
|
40
|
+
f = 'test/files/mixed_content.fa'
|
41
|
+
assert_raises(SystemExit) { @c.send(:assert_input_sequence, f) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_check_num_threads
|
45
|
+
[1, 50, 300].each do |t|
|
46
|
+
@c.send(:check_num_threads, t)
|
47
|
+
end
|
48
|
+
assert_equal(1, @c.send(:check_num_threads, -3))
|
49
|
+
end
|
50
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'minitest/autorun'
|