npsearch 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,108 +1,110 @@
1
- require 'csv'
2
- require 'tempfile'
3
-
1
+ # Top level module / namespace.
4
2
  module NpSearch
5
3
  # A class to score the Sequences
6
4
  class ScoreSequence
7
5
  class << self
8
- DI_CLV = 'KR|RR|KK'
9
- MONO_NP_CLV_2 = '[KR]..R'
10
- MONO_NP_CLV_4 = '[KR]....R'
11
- MONO_NP_CLV_6 = '[KR]......R'
6
+ DI_CLV = 'KR|RR|KK'.freeze
7
+ MONO_NP_CLV_2 = '[KR]..R'.freeze
8
+ MONO_NP_CLV_4 = '[KR]....R'.freeze
9
+ MONO_NP_CLV_6 = '[KR]......R'.freeze
12
10
  NP_CLV = "(#{DI_CLV})|(#{MONO_NP_CLV_2})|(#{MONO_NP_CLV_4})|" \
13
- "(#{MONO_NP_CLV_6})"
11
+ "(#{MONO_NP_CLV_6})".freeze
14
12
 
15
- def run(sequence)
16
- @sequence = sequence
17
- split_into_neuropeptides
18
- count_np_cleavage_sites
19
- count_c_terminal_glycines
20
- np_similarity
21
- acidic_spacers
13
+ def run(sequence, opt)
14
+ split_into_potential_neuropeptides(sequence)
15
+ count_np_cleavage_sites(sequence)
16
+ count_c_terminal_glycines(sequence)
17
+ np_similarity(sequence, opt[:temp_dir])
18
+ acidic_spacers(sequence)
22
19
  end
23
20
 
24
21
  private
25
22
 
26
- def split_into_neuropeptides
23
+ def split_into_potential_neuropeptides(sequence)
27
24
  potential_nps = []
28
- results = @sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
25
+ results = sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
29
26
  headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
30
27
  di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
31
28
  results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
32
- @sequence.potential_cleaved_nps = potential_nps
29
+ sequence.potential_cleaved_nps = potential_nps
33
30
  end
34
31
 
35
- def count_np_cleavage_sites
36
- @sequence.potential_cleaved_nps.each do |e|
37
- count_dibasic_np_clv(e[:di_clv_end])
38
- count_mono_basic_np_clv(e[:mono_2_clv_end], e[:mono_4_clv_end],
39
- e[:mono_6_clv_end])
32
+ def count_np_cleavage_sites(sequence)
33
+ return if sequence.potential_cleaved_nps.empty?
34
+ sequence.potential_cleaved_nps.each do |e|
35
+ count_dibasic_np_clv(sequence, e[:di_clv_end])
36
+ count_mono_basic_np_clv(sequence, e[:mono_2_clv_end],
37
+ e[:mono_4_clv_end], e[:mono_6_clv_end])
40
38
  end
41
39
  end
42
40
 
43
- def count_dibasic_np_clv(dibasic_clv)
41
+ def count_dibasic_np_clv(sequence, dibasic_clv)
44
42
  case dibasic_clv
45
43
  when 'KR'
46
- @sequence.score += 0.09
44
+ sequence.score += 0.09
47
45
  when 'RR', 'KK'
48
- @sequence.score += 0.05
46
+ sequence.score += 0.05
49
47
  end
50
48
  end
51
49
 
52
- def count_mono_basic_np_clv(mono_2_clv, mono_4_clv, mono_6_clv)
50
+ def count_mono_basic_np_clv(sequence, mono_2_clv, mono_4_clv, mono_6_clv)
53
51
  return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
54
- @sequence.score += 0.02
52
+ sequence.score += 0.02
55
53
  end
56
54
 
57
55
  # Counts the number of C-terminal glycines
58
- def count_c_terminal_glycines
59
- @sequence.potential_cleaved_nps.each do |e|
60
- if e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
61
- @sequence.score += 0.25
56
+ def count_c_terminal_glycines(sequence)
57
+ return if sequence.potential_cleaved_nps.empty?
58
+ sequence.potential_cleaved_nps.each do |e|
59
+ if e[:np] =~ /FG$/ && e[:di_clv_end] == 'KR'
60
+ sequence.score += 0.40
61
+ elsif e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
62
+ sequence.score += 0.25
62
63
  elsif e[:np] =~ /G$|GK$|GR$/
63
- @sequence.score += 0.10
64
+ sequence.score += 0.10
64
65
  end
65
66
  end
66
67
  end
67
68
 
68
- def acidic_spacers
69
- @sequence.potential_cleaved_nps.each do |e|
70
- acidic_residue = e[:np].count('DE')
71
- percentage_acidic = acidic_residue / e[:np].length
72
- @sequence.score += 0.10 if percentage_acidic > 0.5
69
+ # Adds 0.10 if the acidic spacer is detected.
70
+ # Acidic Spacer is defined as being less than 25% of the precursor length
71
+ # (not including the Signalp) && having more than 50% D and E amino acids.
72
+ def acidic_spacers(sequence)
73
+ sequence.potential_cleaved_nps.each do |e|
74
+ next if e[:np].length / sequence.seq.length > 0.25
75
+ sequence.score += 0.10 if e[:np].count('DE') / e[:np].length > 0.5
73
76
  end
74
77
  end
75
78
 
76
- def np_similarity
77
- results = run_uclust
78
- results.gsub!(/^[^C].*\n/, '')
79
- results.each_line do |c|
80
- cluster = c.split(/\t/)
81
- no_of_seq_in_cluster = cluster[3].to_i
82
- if no_of_seq_in_cluster > 1
83
- @sequence.score += (0.15 * no_of_seq_in_cluster)
79
+ def np_similarity(sequence, temp_dir, results = nil)
80
+ results = run_cdhit(sequence, temp_dir) if results.nil?
81
+ clusters = results.split(/^>Cluster \d+\n/)
82
+ clusters.each do |c|
83
+ next if c.nil?
84
+ no_of_seqs_in_cluster = c.split("\n").length
85
+ if no_of_seqs_in_cluster > 1
86
+ sequence.score += (0.15 * no_of_seqs_in_cluster)
84
87
  end
85
88
  end
86
89
  end
87
90
 
88
- def run_uclust
89
- f = Tempfile.new('uclust')
90
- fo = Tempfile.new('uclust_out')
91
- write_sequence_content_to_tempfile(f)
92
- `usearch -cluster_fast #{f.path} -id 0.5 -uc #{fo.path} >/dev/null 2>&1`
93
- IO.read(fo.path)
94
- ensure
95
- f.unlink
96
- fo.unlink
91
+ def run_cdhit(sequence, temp_dir)
92
+ f = Tempfile.new('clust', temp_dir)
93
+ fo = Tempfile.new('clust_out', temp_dir)
94
+ return unless write_potential_peptides_to_tempfile(sequence, f)
95
+ `cd-hit -c 0.5 -n 3 -l 4 -i #{f.path} -o #{fo.path}`
96
+ IO.read("#{fo.path}.clstr")
97
97
  end
98
98
 
99
- def write_sequence_content_to_tempfile(tempfile)
100
- content = ''
101
- @sequence.potential_cleaved_nps.each_with_index do |e, i|
102
- content += ">seq#{i}\n#{e[:np]}\n"
99
+ def write_potential_peptides_to_tempfile(sequence, tempfile)
100
+ return false if sequence.potential_cleaved_nps.empty?
101
+ sequences = ''
102
+ sequence.potential_cleaved_nps.each_with_index do |e, i|
103
+ sequences += ">seq#{i}\n#{e[:np]}\n"
103
104
  end
104
- tempfile.write(content)
105
+ tempfile.write(sequences)
105
106
  tempfile.close
107
+ true
106
108
  end
107
109
  end
108
110
  end
@@ -1,23 +1,26 @@
1
+ # Top level module / namespace.
1
2
  module NpSearch
3
+ # Adapted from GeneValidator's Query Class..
2
4
  # A class to hold sequence data
3
5
  class Sequence
4
- DI_NP_CLV = 'KR|KK|RR'
5
- MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'
6
+ DI_NP_CLV = 'KR|KK|RR'.freeze
7
+ MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'.freeze
6
8
 
7
9
  attr_reader :id
10
+ attr_reader :defline
8
11
  attr_reader :signalp
9
12
  attr_reader :seq
10
13
  attr_reader :html_seq
11
- attr_reader :signalp_output
12
14
  attr_reader :translated_frame
13
15
  attr_accessor :score
14
16
  attr_accessor :potential_cleaved_nps
15
17
 
16
- def initialize(id, seq, signalp_output, frame = nil)
17
- @id = id
18
- sp_cleavage_site_idx = signalp_output[:ymax_pos].to_i - 1
19
- @signalp = seq[0..(sp_cleavage_site_idx - 1)]
20
- @seq = seq[sp_cleavage_site_idx..-1]
18
+ def initialize(entry, sp, frame = nil)
19
+ @id = entry.entry_id
20
+ @defline = entry.definition
21
+ sp_cleavage_site_idx = sp[:ymax_pos].to_i - 1
22
+ @signalp = sp[:orf][0..(sp_cleavage_site_idx - 1)]
23
+ @seq = sp[:orf][sp_cleavage_site_idx..-1]
21
24
  @html_seq = format_seq_for_html
22
25
  @translated_frame = frame
23
26
  @score = 0
@@ -27,7 +30,7 @@ module NpSearch
27
30
  def format_seq_for_html
28
31
  seq = @seq.gsub(/C/, '<span class=cysteine>C</span>')
29
32
  seq.gsub!(/#{DI_NP_CLV}/i, '<span class=np_clv>\0</span>')
30
- seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target R >>
33
+ seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target 'R'
31
34
  seq.gsub!('R::NP_CLV::', '<span class=mono_np_clv>R</span>')
32
35
  seq.gsub!('G<span class=np_clv>',
33
36
  '<span class=glycine>G</span><span class=np_clv>')
@@ -1,6 +1,6 @@
1
1
  require 'forwardable'
2
- require 'tempfile'
3
2
 
3
+ # Top level module / namespace.
4
4
  module NpSearch
5
5
  # A class to hold sequence data
6
6
  class Signalp
@@ -10,15 +10,34 @@ module NpSearch
10
10
 
11
11
  def analyse_sequence(seq)
12
12
  sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
13
- sp dmaxcut networks)
14
- f = Tempfile.new('signalp')
15
- f.write(">seq\n#{seq}")
16
- f.close
17
- s = `#{opt[:signalp_path]} -t euk -f short -U 0.3 -u 0.3 '#{f.path}' | \
18
- sed -n '3 p'`
19
- Hash[sp_headers.map(&:to_sym).zip(s.split)]
20
- ensure
21
- f.unlink
13
+ sp dmaxcut networks orf)
14
+ data = setup_analysis(seq)
15
+ orf_results = []
16
+ s = `echo "#{data[:fasta]}\n" | #{opt[:signalp_path]} -t euk \
17
+ -f short -U 0.34 -u 0.34`
18
+ sp_results = s.split("\n").delete_if { |l| l[0] == '#' }
19
+ sp_results.each_with_index do |line, idx|
20
+ line = line + ' ' + data[:seq][idx].to_s
21
+ orf_results << Hash[sp_headers.map(&:to_sym).zip(line.split)]
22
+ end
23
+ orf_results.sort_by { |h| h[:d] }.reverse[0]
24
+ end
25
+
26
+ def setup_analysis(seq)
27
+ if opt[:type] == :protein
28
+ data = { seq: [seq], fasta: ">seq\n#{seq}" }
29
+ else
30
+ orfs = seq.scan(/(?=(M\w+))./).flatten
31
+ orfs.unshift(seq)
32
+ data = { seq: orfs, fasta: create_orf_fasta(orfs) }
33
+ end
34
+ data
35
+ end
36
+
37
+ def create_orf_fasta(m_orf)
38
+ fasta = ''
39
+ m_orf.each_with_index { |seq, idx| fasta << ">#{idx}\n#{seq}\n" }
40
+ fasta
22
41
  end
23
42
  end
24
43
  end
@@ -1,4 +1,4 @@
1
1
  # Top level module / namespace.
2
2
  module NpSearch
3
- VERSION = '2.0.1'
3
+ VERSION = '2.1.0'.freeze
4
4
  end
data/lib/npsearch.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'bio'
2
2
  require 'fileutils'
3
3
 
4
- # require 'npsearch/arg_validator'
4
+ require 'npsearch/arg_validator'
5
5
  require 'npsearch/output'
6
6
  require 'npsearch/pool'
7
7
  require 'npsearch/scoresequence'
@@ -11,33 +11,39 @@ require 'npsearch/signalp'
11
11
  # Top level module / namespace.
12
12
  module NpSearch
13
13
  class <<self
14
- MIN_ORF_SIZE = 30 # amino acids (including potential signal peptide)
15
-
16
14
  attr_accessor :opt
17
15
  attr_accessor :sequences
18
16
  attr_reader :sorted_sequences
19
17
 
20
18
  def init(opt)
21
- # @opt = args_validation(opt)
22
- @opt = opt
23
- @sequences = []
19
+ @opt = ArgumentsValidators.run(opt)
20
+ @sequences = []
24
21
  @sorted_sequences = nil
25
- @opt[:type] = guess_sequence_type
26
- @pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
22
+ @pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
23
+ FileUtils.mkdir_p(@opt[:temp_dir])
24
+ extract_orf if @opt[:type] == :genetic
27
25
  end
28
26
 
29
27
  def run
30
- iterate_input_file
28
+ input_file = @opt[:type] == :protein ? @opt[:input_file] : @opt[:orf]
29
+ iterate_input_file(input_file)
31
30
  @sorted_sequences = @sequences.sort_by(&:score).reverse
32
31
  Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
33
32
  Output.to_html(@opt[:input_file])
33
+ remove_temp_dir
34
34
  end
35
35
 
36
36
  private
37
37
 
38
- def iterate_input_file
39
- biofastafile = Bio::FlatFile.open(Bio::FastaFormat, @opt[:input_file])
40
- biofastafile.each_entry do |entry|
38
+ # Uses getorf from EMBOSS package to extract all ORF
39
+ def extract_orf(input = @opt[:input_file], minsize = 90)
40
+ @opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
41
+ system "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
42
+ " -minsize #{minsize} >/dev/null 2>&1"
43
+ end
44
+
45
+ def iterate_input_file(input_file)
46
+ Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
41
47
  if @opt[:num_threads] > 1
42
48
  @pool.schedule(entry) { |e| initialise_seqs(e) }
43
49
  else
@@ -48,49 +54,18 @@ module NpSearch
48
54
  end
49
55
 
50
56
  def initialise_seqs(entry)
51
- if @opt[:type] == :protein
52
- initialise_protein_seq(entry.entry_id, entry.aaseq)
53
- else
54
- initialise_transcriptomic_seq(entry.entry_id, entry.naseq)
55
- end
56
- end
57
-
58
- def initialise_protein_seq(id, seq)
59
- sp = Signalp.analyse_sequence(seq)
60
- return unless sp[:sp] == 'Y'
61
- seq = Sequence.new(id, seq, sp)
62
- ScoreSequence.run(seq)
57
+ return if entry.aaseq.length > @opt[:max_seq_length]
58
+ sp = Signalp.analyse_sequence(entry.aaseq)
59
+ return if sp[:sp] == 'N'
60
+ # seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
61
+ seq = Sequence.new(entry, sp)
62
+ ScoreSequence.run(seq, @opt)
63
63
  @sequences << seq
64
64
  end
65
65
 
66
- def initialise_transcriptomic_seq(id, naseq)
67
- (1..6).each do |f|
68
- translated_seq = naseq.translate(f)
69
- orfs = translated_seq.to_s.scan(/(?=(M\w{#{MIN_ORF_SIZE},}))./).flatten
70
- initialise_orfs(id, orfs, f)
71
- end
72
- end
73
-
74
- def initialise_orfs(id, orfs, frame)
75
- orfs.each do |orf|
76
- sp = Signalp.analyse_sequence(orf)
77
- next if sp[:sp] == 'N'
78
- seq = Sequence.new(id, orf, sp, frame)
79
- ScoreSequence.run(seq)
80
- @sequences << seq
81
- # The remaining ORF in this frame are simply shorter versions of the
82
- # same orf so break loop once signal peptide is found.
83
- break if sp[:sp] == 'Y'
84
- end
85
- end
86
-
87
- def guess_sequence_type
88
- fasta_content = IO.binread(@opt[:input_file])
89
- # removing non-letter and ambiguous characters
90
- cleaned_sequence = fasta_content.gsub(/[^A-Z]|[NX]/i, '')
91
- return nil if cleaned_sequence.length < 10 # conservative
92
- type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
93
- (type == Bio::Sequence::NA) ? :nucleotide : :protein
66
+ def remove_temp_dir
67
+ return unless File.directory?(@opt[:temp_dir])
68
+ FileUtils.rm_rf(@opt[:temp_dir])
94
69
  end
95
70
  end
96
71
  end
data/npsearch.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
16
16
  ' For more information: https://github.com/wurmlab/npsearch'
17
17
  s.summary = 'Search for neuropeptides based on the common' \
18
18
  ' neuropeptides markers'
19
- s.homepage = 'https://github.com/IsmailM/NeuroPeptideSearch'
19
+ s.homepage = 'https://github.com/wurmlab/npsearch'
20
20
  s.license = 'AGPL'
21
21
 
22
22
  s.files = `git ls-files -z`.split("\x0")
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
28
28
  s.add_development_dependency 'bundler', '~> 1.6'
29
29
  s.add_development_dependency 'rake', '~>10.3'
30
30
  s.add_development_dependency 'coveralls'
31
+ s.add_development_dependency 'minitest', '~> 5.4'
31
32
 
32
33
  s.add_dependency 'bio', '~> 1.4'
33
34
  s.add_dependency 'slim', '~> 3.0'
@@ -29,9 +29,9 @@ html lang="en"
29
29
  p.sequence
30
30
  span.id
31
31
  - if @opt[:type] == :protein
32
- | >#{seq.id}
32
+ | >#{seq.defline}
33
33
  - elsif @opt[:type] == :nucleotide
34
- | >#{seq.id}-(frame:#{seq.translated_frame})
34
+ | >#{seq.defline}-(frame:#{seq.translated_frame})
35
35
  br
36
36
  span.seq== seq.html_seq
37
37
  br
@@ -50,5 +50,5 @@ html lang="en"
50
50
  a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
51
51
  br
52
52
  | This page was created by
53
- a href="https://github.com/wurmlab/NpSearch" target="_blank" NpSearch
53
+ a href="https://github.com/wurmlab/npsearch" target="_blank" NpSearch
54
54
  | v#{NpSearch::VERSION}
@@ -0,0 +1,167 @@
1
+ >isotig00006 gene=isogroup00003 length=1747 numContigs=6
2
+ AGTTAAAAGTTGAAAAATTGGTGACCATATTTTGACACTCTAGCATATTTGGGAGCTATA
3
+ TACTGATTTGGGTTTCACCATGCACAGATGAGGTATATACATAAGTTGAAAGCCTGCAGC
4
+ TCTATATTAAAGGCATTGAAGACtcGCCcAAaccgtgTGcgcccTCTGAAAAaGTTAACT
5
+ TTCcGTTgCTTGCAaGTGAAGTTTtcTtCTTGTCGCTACAAAATGCAGACAGTAATGAAA
6
+
7
+ >isotig00007 gene=isogroup00003 length=1749 numContigs=5
8
+ TGTGTGTGTGTGGTGCTTCCccTCTAGGGCTGTAAATTTCAAAGGAACCTTGCGCAAGAA
9
+ CAGtAGCTTGCGaCGTTTTTCAAaaCCAGAGGTTCTGAACTGAACTGTACTGACTACTGT
10
+ AGGGtacTTAAaGGCATTGAAGACTCGCCcAAaCCatgTGCCGCGctttGAAAAAGTTAA
11
+ CTTTCCGTTGCTTGCAAATGAcGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgA
12
+ AACGTGATACcTtGTtATCTTTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTG
13
+ TGTTGTGGgTATTGACcGTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGAC
14
+ GGTAGCAAGCTGTGTGTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACA
15
+ CCtCAtTcGAAACTAGGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACA
16
+ cccttttaaggagaagtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGG
17
+ TAGCATGCAACTTAAAAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATT
18
+ ACAAGGttAAtCtacTGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCT
19
+ TtCAaTAaTTATACAAACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTC
20
+ AAGTTTATCAATGTAATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCT
21
+ GTGGATAAGACTGCCAGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCC
22
+ TTCACCTCCTTGCAGATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATT
23
+ AAAATCTGGCTTCCTcCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGT
24
+ GTGAAACCACTGAAAGATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGA
25
+ CAGAGGCCACACTGATACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATT
26
+ TTGCTTCTGCGATGCAGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAa
27
+ CTGCCtGGTTtAtAGAGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTG
28
+ TCATGGCCTTGAGCAAGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTA
29
+ GGTACTGTcAAATCCACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAA
30
+ AGAGTACAATGAGGGTTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGC
31
+ AGCTGaaaaGATATTCAGAAaTTGTTATATATGAGTGTGTTTGTATGCATGCAtATGtGT
32
+ GATTTtCTtGCTTTACAGAACAGCTCCaTTTTGATAAGCTAtgTAAcgtGgAAACCTGCC
33
+ AATCAaTGTTtgAAataGGAcaGgCTGAAACGATTCTTAAATGAAAAGCTTAAtgaCTTc
34
+ TTgCAtttttaTACATCACTGTTCAGGtAaGGCCAGTAAGGgCAGTATgAaGAAtAaGTA
35
+ ACAATtAATAATTATCATTATGGCCATTTGCTGtcTGCATAAtAaCAAACTGAATGATGT
36
+ CATCAGCCCTgTGCTCAGTTGACAgAACTGACAAGTAGGCACACaaTGTCAGTGTGATCC
37
+ ATGAAACCT
38
+ >isotig00008 gene=isogroup00003 length=1726 numContigs=6
39
+ AGGTTTCATGGATCACACTGACAtTGTGTGCCTACTTGTCAGTTcTGTCAACTGAGCAcA
40
+ GGGCTGATGACATCATTCAGTTTGttattATGCAggaCAGCAAATGGCCATAATGATAAT
41
+ TATTAaTTGTTACTtaTTCTtcATACTGCCcTTACTGGCCTtaCCTGAACAGTGATGTAt
42
+ caaaaTGcAAgAAGtcaTTAAGCTTTTCATTTAAGAATCGTTTCAGCctgTCCtaatTTt
43
+ cAAaCAtTGATTGGCAGGTTTCcacgTTAcaTAGCTTATCAAAAtGGAGCTGTTCTGTAA
44
+ AGCAAGaAAATCACaCATaTGCATGCATACAAACACACTCATATATAACAAtTTCTGAAT
45
+ ATCTtttCAGCTGCCCAAAGGTACTGGAATGCAAGCTCTGTTGGGTGAATTAAAAAaCCc
46
+ TCATTGTACTCTTTTATCATGGTCAGCGTAGCTGGAACCAGCAATGATGGTGATGTGGAT
47
+ TTGACAGTACCTAGACAAGAATTCAATAGCTGCTTTGTGTTGAAGTGGGTTCAACTTGCT
48
+ CAAGGCCATGACAGGATGACGACTAGACATtAGGATGAATTATCTGTTGCAGAGCTCTAT
49
+ AAaCCAGGCAGTtGTtAAAaCATCCATTGGGTGACCcTCACAGTCTACCAGGCACTGCAT
50
+ CGCAGAAGCAAAATGAAGTGTGTGTgATACCTTCATCTTATGAGAGTGGAATGGTATCAG
51
+ TGTGGCCTCTGTCAATTTGGCTTCAAGTCTTTGTTTTCTTGAAAGCTGAGAaGATCTTTC
52
+ AGTGGTTTCACACTGACCTCTGTACTTGACAATTCATGTGTATTTGCCAGCTCAGgAGGA
53
+ AGCCAGATTTTAATTACATTAACCAATGCTGACTTTTTTttGGACATGTGGTACATCTGC
54
+ AAGGAGGTGAAGGAAGCAGTTGGAGTTGCATGGATGTGGAATCTTGTTGATAGTCTGGCA
55
+ GTCTTATCCACAGATTATCCCAAAGCTTCTCCACATTGCCACATTCAGAACTTATTACAT
56
+ TGATAAACTTGAAAATtGCAGGAATCTAaCcAaGCACCcATCAaGGGAaTTTGTTTGTAT
57
+ AATtATtGAAaGCTGTGACcTTCTGATGTGACAGACTAATGTGAAaTAAAGGgCAgtaGa
58
+ TTaCCTTGTaaTGAACCttGTTATTGTTTGATTGTATCTAAtGTTTGCAaaTTTTAAGTT
59
+ GCATGCTACCAATTGAAACATAATTCTTTCTCTAttaatgggatataaaatacttctcct
60
+ taaaagggTGTgAaGACTcggCACAAAGAAACGTCtaTGCcGgtAaTCTGACCTAGTTTc
61
+ gAatGaGGTGTAACagAAGTgTtAGACACcACCAttGATCCcAGAAAATACACACACAGC
62
+ TTGCTACCGTCGGTAaTTAGACACTAGTGTACAGTCAaTACATACAGCTAcGgTCAATAC
63
+ CCACAaCACAgTGTAcATAGCAGCGaTGgACATCTCAGGTCTAGATAAAAGATAaCAAGG
64
+ TATCACGTTTCATtaCTGTCTGCATTTtGTAGCgaCAagAAGAAAAcgtCATTtGCAAGC
65
+ AaTGgAAAGTtAACTTTTTCaGAGCGcagCAcGCgggTTGGGGCAAGTCTTCCAAGCCTT
66
+ TAAGTtGACAtcTTGCCTTTGGCTATCCAGGgTGACAAGATGATACTAGCAGGTAgagtg
67
+ actaattgagccctgtgtgagaaaccaatgcagaatctagcctagt
68
+ >isotig00009 gene=isogroup00003 length=1827 numContigs=2
69
+ TAGCTGTGATCTAGTGGATCTGACTGGCCTTTTGATTATTTCAGCacGATTCTCAGACTA
70
+ CAGTTGTAAaCCTACTTCGACTACTACTACTActagtacTAACGGTGCAACGTTGTTATA
71
+ AGTTTGCCAAAGGTGAAACTTTAGCCTTAGGACtGTGTTTATTTTATTTGCAGTCGCATT
72
+ CgCCTAACTGTTTTCTGTTACTGGGTGCATTTAACTCACATTAATAGAGGATTTTtGACT
73
+ AGTtCcTAGAGAGTGGTGTTTCTGTTTTACCACCATGGCAAAAAAGGGAAaGCCTCGCCC
74
+ TGACCATAGGCCTCCTGCACACAACCCGCATTATGCTCATGATCCACCACCTTATTCACA
75
+ ACAGCAACCACCACTTCAACAGCAGAACTATGCACAACAAATGCATCATGGTGGAGGTGG
76
+ TGGAAATAGACAACATGCACGACcTAGACCTAGTCCACCTTCAGAAGTCAGTGACTGTGT
77
+ CAAGTACTCCCTTTTCTtGTATAACTGCATCTTTTGgaTTGtCGGCCTTttCTTTATtGC
78
+ AGCAGGTATCTGGgCATTTCACGATAGGGGTGTTTTTAATGAATTCCAGTCACTTAGTAC
79
+ CAATGAGGTCTCCTTTCTCACTGATCCTGTTATTTGGCTGTTCGTCCTCGGAGGTGTAGT
80
+ TTTCATGCTGGGAACCCTCGGATGTCTgGGGgCCCTCAGAGAAAaTATCTGCATGCTGAA
81
+ GTGTTTTAGCATAATCATGGGGCTTATACTGCTGCTGGAAATTGGAGGTGGATGTGCGAT
82
+ ATACTTCTATCGTGCACAGATTCAGGCACAGTTTCAAAAGTCCTTAACAGATGTGaCCAT
83
+ AACAGATTACAGAGAAAATGCTGATTTCCAGGATCTCATAGACGCATTACAATCCGGTCT
84
+ TTCTTGTTGTGGTGTCAATTCCTatGAAGACTGGGATAATAATATTTATTTCAACTGTAG
85
+ TGGTCCTGCCAATAACCCTGAAGCcttGTGGTGTGCCTTtCTCCTGTTGTATACCGGATC
86
+ AAGCAAGCGGAGTAGCCAACACCCAGTGCGGTTATGGAGTTCGTTCCCCCGAACAACAAA
87
+ ATACTTTCCACACAAAGATTTACACCACTGGCTGTGCGGATATGTTTACAATGTGGATTA
88
+ ATAGGTACCTATATTACATAGCAGGCATTGCTGGGGTCATTGTCTTGGTCGAGTtGTTTG
89
+ GATTCTGTTTTGCACATTCCCTCATCAACGACATCAAACGCCAAAAGGCCCGCTGGGCGC
90
+ ATCGATAATTCATTCCAGGATGTTGGTGgATGATGCTACTCAAGGGagAAGACTGACAGT
91
+ GCCTTTtGGTCAaTATCGTGTAGCATCAGGAAGGAGGTAGTACCTCCTCAACTAACCaTA
92
+ ACAGAATTTGTCCAGTTTGTAACATCGTCAAGAAATAAACAGACTTTTTTTACCATTAGG
93
+ ACgTGATAATACTACCACGTAACCTCTCAAAGCACAAAAAGCAAAAAGCAAATATCTCCT
94
+ TGTTTTAAAATTAGaagGTCTATCTCAGATAACAACCACAGAACATgTGGAGTTTTCCtT
95
+ TATGCTATCATAAAGATATAAATATATATAAAATTGAGGTAGcATCtTGGCTACCCACCA
96
+ AAATCATTTTTTTTCCAGTTTGaAACATCATGGAACATTTCAGAACAAAGATCATTTCAG
97
+ TCGTTACCACACTCAAGAgaTTGCTGTcGTCAaCaTTTtGtaGCTTTTtAAtGTCTTGAT
98
+ CTTCGTCGACATCGTCAATGTGTAAACTATTCTCGACGAGAGATTAGTGTCTAATACTGC
99
+ GGGTgATTTGATATAAATCTCACTTGG
100
+ >isotig00010 gene=isogroup00003 length=1650 numContigs=5
101
+ TGAATGAGAAAtGAAATTTAGCGAAGAAATCACCTTGTAAATTAAAAACTAAAATGGCTT
102
+ TCACACAAATTAaCAGTAAAtGgAGAATGTTTTTAAAGCAATATATGCAGTACAGCcATT
103
+ CATTGGAAAACAGTAAcAAAaTACATTTATCTTGTtcATTTTtACctCctGCAAaacTTA
104
+ cAaCcGTTAATTATGTAGATTGGATGGCACTAACAGGGTACTTGTCTTATCTGCCTATTG
105
+ GATAATGTGGcATTAATACTACTGTGTATGGGCACTGAGGCTGAGAGTGCAGTAAGTTtA
106
+ AAGGCATTGAAGACTCtCCCCGAaCcGCGtGCCGGGCTctGAAAAAGTtAaCTGCTCGCA
107
+ AaTtAcGTTTtCTtCTTGTCaCTaCAAAaTGCAGACATTaaTGAAACGTGATACCTTGTt
108
+ ATCTTTTATCTAGACCTGAGATGTCcAtCGCTGCTATgTACAcTGTGTTGTGGGTATTGA
109
+ CcgTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGT
110
+ GTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTA
111
+ GGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaa
112
+ gtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAA
113
+ AAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtac
114
+ TGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACA
115
+ AACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTA
116
+ ATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCC
117
+ AGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAG
118
+ ATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCT
119
+ cCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAA
120
+ GATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGA
121
+ TACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGC
122
+ AGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAG
123
+ AGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCA
124
+ AGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCC
125
+ ACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGG
126
+ TTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTT
127
+ TGTTCCTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAG
128
+ CACAGTGTGCAAAGctGCTATATATTGTCC
129
+ >isotig00011 gene=isogroup00003 length=1525 numContigs=6
130
+ ACATTCTTCAAGAGCTCTGCACCCACCAATCTAAAGTGACCAGCCAAGTGACTGACCTCA
131
+ GGGCACAGTTAGCAGCTTTGACCACAGGATGAGCTATGTAACAACTGAAtgaaTGGTGTT
132
+ CAtcGTTGATTGGGCAgTCAAAACAGCTGAATTTCTCTTGCGgAAGACATAAAGGCATTG
133
+ AAGACtcGCCcAAaccGtGTGcgcccTCTGAAAAaGTTAACTTTctGTTgCTTGCAaGTG
134
+ AAGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgAAACGTGATACcTtGTtATCT
135
+ TTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTGTGTTGTGGgTATTGACcGTA
136
+ GCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGTGTGTA
137
+ TTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTAGGTCA
138
+ GAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaagtatt
139
+ ttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAAAAtTT
140
+ TGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtacTGCCC
141
+ TTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACAAACAA
142
+ ATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTAATAAG
143
+ TTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCCAGACT
144
+ ATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAGATGTA
145
+ CCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCTcCTGA
146
+ GCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAAGATCT
147
+ TCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGATACCA
148
+ TTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGCAGTGC
149
+ CTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAGAGCTC
150
+ TGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCAAGTTG
151
+ AACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCCACATC
152
+ ACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGGTTTTT
153
+ TAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTTTGTTC
154
+ CTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAGCACAG
155
+ TGTGCAAAGctGCTATATATTGTCC
156
+ >isotig00001_f6
157
+ FRIYKNYALQYVSKDAHLSLLWSAVTHLGTQYFGKTPFVLTYILLTECAVESCMEACIETLINNYEKDKLLPLQYYTSYFLCRHLAKCDYSKTDKILNVCQRNLMAFLLKINPFHLLRHRLAPNRMQPYEKVNFLLLASDVLFVSLQNADSNETRYIVIFSWTDVHRCTVCTLCCGCLQLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITAPFFLRESSHRIMIIGFSNLEPLTKCVTELIFLVTKKTLVHMRERYINMSIRGRVVYLLPDSADNSQLFENTLCYISSSFKAPKPNQQNKKPNEQRNCMTNVNFSKIELLKHISQHISHRANRTELELTVHCTFHFFLYLKVTTKLHRKTKTSSLHFNHKNIFALITKKIRYTIRQTLPYLKPLRLCSTLSVLNCLYSTVLTRQISIIKISKIYYIHNLKDVECLSGVKAGAFQSGPLFESLQDCMSSSYRVVENRQFIIMDVKYETKRLTSVSLRLANEASSRVPACRMTNTYIYITVMRNAYLPKRCSYTRKGSTHSHLLKVDVVVHVVYFHHLHHDAFVVHSSAVEVVVAVVNKVVDHEHNAGCVQEAYGQGEAFPFLPWWNRNTTLELVKNPLLMVKCTQQKTVRRMRLQIKTQSGSFTFGKLITTLHRYSKVYNCSLRIVLKSKGQSDPLDHS
158
+ >isotig00002_f1
159
+ MRNEIRRNHLVNKLKWLSHKLTVNGECFSNICSTAIHWKTVTKYIYLVHFYLLQNLQPLIMIGWHQGTCLICLLDNVALILLCMGTEAESAVSLKALKTLPEPRAGLKSLLANYVFFLSLQNADINETYLVIFYLDLRCPSLLCTLCCGYPLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITGIDVSLCRVFTPFGEVFYIPLIEKELCFNWHATNFANIRYNQTITRFITRLIYCPLFHISLSHQKVTAFNNYTNKFPWVLGIPAIFKFINVISSECGNVEEALGSVDKTARLSTRFHIHATPTASFTSLQMYHMSKKKSALVNVIKIWLPPELANTHELSSTEVSVKPLKDLLSFQENKDLKPNQRPHYHSTLIRRYHTHFILLLRCSAWTVRVTQWMFQLPGLSSATDNSSCLVVILSWPASTHFNTKQLLNSCLGTVKSTSPSLLGSSYADHDKRVQGFFNSPNRACIPVPLGSKDIQKLLYMSVFVCMHMCDFLALQNSSILISYVTWKPANQCLKDRLKRFLNEKLNDFLHFYTSLFRGQGQYEEVTINNYHYGHLLSAQTECHQPCAQLTELTSRHTMSVSMKP
160
+ >isotig00008_f3
161
+ VSWITLTLCAYLSVLSTEHRADDIIQFVIMQDSKWPLLIVTYSSYCPYWPYLNSDVSKCKKSLSFSFKNRFSLSFSNIDWQVSTLHSLSKWSCSVKQENHTYACIQTHSYITISEYLFSCPKVLECKLCWVNKTLIVLFYHGQRSWNQQWCGFDSTTRIQLLCVEVGSTCSRPQDDDTLGIICCRALTRQLLKHPLGDPHSLPGTASQKQNEVCVIPSSYESGMVSVWPLSIWLQVFVFLKAEKIFQWFHTDLCTQFMCICQLRRKPDFNYINQCLFFGHVVHLQGGEGSSWSCMDVESCSGSLIHRLSQSFSTLPHSELITLINLKIAGIPSTHQGNLFVLLKAVTFCDRLMNKGQITLTLLLFDCICLQILSCMLPIETFFLYWDIKYFSLKGCEDSAQRNVYAGNLTFRMRCNRSVRHHHSQKIHTQLATVGNTLVYSQYIQLRSIPTTQCTQRWTSQVIKDNKVSRFITVCILRQEENVICKQWKVNFFRAQHAGWGKSSKPLSHLAFGYPGQDDTSRSDLSPVETNAESSL
162
+ >isotig00010_f3
163
+ NEKNLAKKSPCKLKTKMAFTQINSKWRMFLKQYMQYSHSLENSNKIHLSCSFLPPAKLTTVNYVDWMALTGYLSYLPIGCGINTTVYGHGECSKFKGIEDSPRTACRALKKLTARKLRFLLVTTKCRHNVIPCYLLSRPEMSIAAMYTVLWVLTVAVCIDCTLVSNYRRQAVCVFSGINGGVHFCYTSFETRSDYRHRRFFVPSLHTLLRRSILYPINRERIMFQLVACNLKFCKHIQSNNNKVHYKVNLLPFISHSVTSEGHSFQLYKQIPLMGAWLDSCNFQVYQCNKFMWQCGRSFGIICGDCQTINKIPHPCNSNCFLHLLADVPHVQKKVSIGCNNLASSAGKYTIVKYRGQCETTERSSQLSRKQRLEAKLTEATLIPFHSHKMKVSHTLHFASAMQCLVDCEGHPMDVLTTAWFIELCNRFILMSSRHPVMALSKLNPLQHKAAIEFLSRYCQIHITIIAWFQLRPKSTMRVFFTQQSLHSSTFGQLISILFLVCLSKSDILSRFVCSQRAQCAKLLYIV
164
+ >isotig00012_f3
165
+ LKVEKLVTIFHSSIFGSYILIWVSPCTDEVYTVESLQLYIKGIEDSPKPCAPSEKVNFPLLASEVFFLSLQNADSNETYLVIFYLDLRCPRCYVHCVVGIDRSCMYLYTSVLPTVASCVCIFWDQWWCLTLLLHLIRNVRLPATFLCAESSHPFKEKYFISHRKNYVSIGSMQLKILQTLDTIKQQGSLQGSTALYFTLVCHIRRSQLSIIIQTNSLDGCLVRFLQFSSLSMVLNVAMWKKLWDNLWIRLPDYQQDSTSMQLQLLPSPPCRCTTCPKKSQHWLMLKSGFLLSWQIHMNCQVQRSVNHKIFSAFKKTKTSQIDRGHTDTIPLSDEGITHTSFCFCDAVPGRLGSPNGCFNNCLVYRALQQIIHPNVSSSCHGLEQVEPTSTQSSYILVVLSNPHHHHCLVPATLTMIKEYNEGFLIHPTELAFQYLWAADIHFVPRMPVKIHSESLRLFATSTVCKAAIYC
166
+ >isotig00015_f4
167
+ RRPYVARVKVINQWLSLELFLQIKVSTKKITVFEGGHRYERSTGQTRTILTAFHPISRTRIEPQTILHCGRTGHSSWDRGVEHTFTTMLPLYNTSFVSSMLPWCMEFCNQQKSLSSVSIATSIRAASMELQGVNVTFYCSVVRNHVCQHGVALKGCEDSRKKNVCRSDLVSNEVQKCTLPSIPENTNTACYRRLDTSVQSIHTATVNTHNTVYIAAMDISGLDKRQGITFHCLHFVVTRRKRNLRAVNFFRARHAVRGESSMPLNLLHSQPQCPYTVVLMPHYPIGRDKYPVSAIQSTLTVVSFAGGKNEQDKCILLLFSNEWLYCIYCFKNILHLLLICVKAILVFNLQGDFFAKFHFSF
@@ -0,0 +1,50 @@
1
+ require_relative 'test_helper'
2
+
3
+ require 'npsearch/arg_validator'
4
+
5
+ # Class to test the how well the CLI arguments are validated.
6
+ class TestInputArgumentValidator < Minitest::Test
7
+ def setup
8
+ @c = NpSearch::ArgumentsValidators
9
+ @opt = { num_threads: 1, min_orf_length: 30 }
10
+ end
11
+
12
+ def test_assert_file_present
13
+ @c.send(:assert_file_present, 'existing file',
14
+ 'test/files/genetic.fa', 1)
15
+ assert_raises(SystemExit) do
16
+ @c.send(:assert_file_present, 'non-existing file',
17
+ 'test/files/nope_dont_exist.fa', 1)
18
+ end
19
+ end
20
+
21
+ def test_assert_input_file_not_empty
22
+ f = 'test/files/genetic.fa'
23
+ @c.send(:assert_input_file_not_empty, f)
24
+ f = 'test/files/empty_file.fa'
25
+ assert_raises(SystemExit) { @c.send(:assert_input_file_not_empty, f) }
26
+ end
27
+
28
+ def test_assert_input_file_probably_fasta
29
+ f = 'test/files/genetic.fa'
30
+ @c.send(:assert_input_file_probably_fasta, f)
31
+ f = 'test/files/not_fasta.fa'
32
+ assert_raises(SystemExit) { @c.send(:assert_input_file_probably_fasta, f) }
33
+ end
34
+
35
+ def test_assert_input_sequence
36
+ f = 'test/files/genetic.fa'
37
+ @c.send(:assert_input_sequence, f)
38
+ f = 'test/files/protein.fa'
39
+ @c.send(:assert_input_sequence, f)
40
+ f = 'test/files/mixed_content.fa'
41
+ assert_raises(SystemExit) { @c.send(:assert_input_sequence, f) }
42
+ end
43
+
44
+ def test_check_num_threads
45
+ [1, 50, 300].each do |t|
46
+ @c.send(:check_num_threads, t)
47
+ end
48
+ assert_equal(1, @c.send(:check_num_threads, -3))
49
+ end
50
+ end
@@ -0,0 +1 @@
1
+ require 'minitest/autorun'