npsearch 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,108 +1,110 @@
1
- require 'csv'
2
- require 'tempfile'
3
-
1
+ # Top level module / namespace.
4
2
  module NpSearch
5
3
  # A class to score the Sequences
6
4
  class ScoreSequence
7
5
  class << self
8
- DI_CLV = 'KR|RR|KK'
9
- MONO_NP_CLV_2 = '[KR]..R'
10
- MONO_NP_CLV_4 = '[KR]....R'
11
- MONO_NP_CLV_6 = '[KR]......R'
6
+ DI_CLV = 'KR|RR|KK'.freeze
7
+ MONO_NP_CLV_2 = '[KR]..R'.freeze
8
+ MONO_NP_CLV_4 = '[KR]....R'.freeze
9
+ MONO_NP_CLV_6 = '[KR]......R'.freeze
12
10
  NP_CLV = "(#{DI_CLV})|(#{MONO_NP_CLV_2})|(#{MONO_NP_CLV_4})|" \
13
- "(#{MONO_NP_CLV_6})"
11
+ "(#{MONO_NP_CLV_6})".freeze
14
12
 
15
- def run(sequence)
16
- @sequence = sequence
17
- split_into_neuropeptides
18
- count_np_cleavage_sites
19
- count_c_terminal_glycines
20
- np_similarity
21
- acidic_spacers
13
+ def run(sequence, opt)
14
+ split_into_potential_neuropeptides(sequence)
15
+ count_np_cleavage_sites(sequence)
16
+ count_c_terminal_glycines(sequence)
17
+ np_similarity(sequence, opt[:temp_dir])
18
+ acidic_spacers(sequence)
22
19
  end
23
20
 
24
21
  private
25
22
 
26
- def split_into_neuropeptides
23
+ def split_into_potential_neuropeptides(sequence)
27
24
  potential_nps = []
28
- results = @sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
25
+ results = sequence.seq.scan(/(?<=^|#{NP_CLV})(\w+?)(?=#{NP_CLV}|$)/i)
29
26
  headers = %w(di_clv_st mono_2_clv_st mono_4_clv_st mono_6_clv_st np
30
27
  di_clv_end mono_2_clv_end mono_4_clv_end mono_6_clv_end)
31
28
  results.each { |e| potential_nps << Hash[headers.map(&:to_sym).zip(e)] }
32
- @sequence.potential_cleaved_nps = potential_nps
29
+ sequence.potential_cleaved_nps = potential_nps
33
30
  end
34
31
 
35
- def count_np_cleavage_sites
36
- @sequence.potential_cleaved_nps.each do |e|
37
- count_dibasic_np_clv(e[:di_clv_end])
38
- count_mono_basic_np_clv(e[:mono_2_clv_end], e[:mono_4_clv_end],
39
- e[:mono_6_clv_end])
32
+ def count_np_cleavage_sites(sequence)
33
+ return if sequence.potential_cleaved_nps.empty?
34
+ sequence.potential_cleaved_nps.each do |e|
35
+ count_dibasic_np_clv(sequence, e[:di_clv_end])
36
+ count_mono_basic_np_clv(sequence, e[:mono_2_clv_end],
37
+ e[:mono_4_clv_end], e[:mono_6_clv_end])
40
38
  end
41
39
  end
42
40
 
43
- def count_dibasic_np_clv(dibasic_clv)
41
+ def count_dibasic_np_clv(sequence, dibasic_clv)
44
42
  case dibasic_clv
45
43
  when 'KR'
46
- @sequence.score += 0.09
44
+ sequence.score += 0.09
47
45
  when 'RR', 'KK'
48
- @sequence.score += 0.05
46
+ sequence.score += 0.05
49
47
  end
50
48
  end
51
49
 
52
- def count_mono_basic_np_clv(mono_2_clv, mono_4_clv, mono_6_clv)
50
+ def count_mono_basic_np_clv(sequence, mono_2_clv, mono_4_clv, mono_6_clv)
53
51
  return if mono_2_clv.nil? && mono_4_clv.nil? && mono_6_clv.nil?
54
- @sequence.score += 0.02
52
+ sequence.score += 0.02
55
53
  end
56
54
 
57
55
  # Counts the number of C-terminal glycines
58
- def count_c_terminal_glycines
59
- @sequence.potential_cleaved_nps.each do |e|
60
- if e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
61
- @sequence.score += 0.25
56
+ def count_c_terminal_glycines(sequence)
57
+ return if sequence.potential_cleaved_nps.empty?
58
+ sequence.potential_cleaved_nps.each do |e|
59
+ if e[:np] =~ /FG$/ && e[:di_clv_end] == 'KR'
60
+ sequence.score += 0.40
61
+ elsif e[:np] =~ /G$/ && e[:di_clv_end] == 'KR'
62
+ sequence.score += 0.25
62
63
  elsif e[:np] =~ /G$|GK$|GR$/
63
- @sequence.score += 0.10
64
+ sequence.score += 0.10
64
65
  end
65
66
  end
66
67
  end
67
68
 
68
- def acidic_spacers
69
- @sequence.potential_cleaved_nps.each do |e|
70
- acidic_residue = e[:np].count('DE')
71
- percentage_acidic = acidic_residue / e[:np].length
72
- @sequence.score += 0.10 if percentage_acidic > 0.5
69
+ # Adds 0.10 if the acidic spacer is detected.
70
+ # Acidic Spacer is defined as being less than 25% of the precursor length
71
+ # (not including the Signalp) && having more than 50% D and E amino acids.
72
+ def acidic_spacers(sequence)
73
+ sequence.potential_cleaved_nps.each do |e|
74
+ next if e[:np].length / sequence.seq.length > 0.25
75
+ sequence.score += 0.10 if e[:np].count('DE') / e[:np].length > 0.5
73
76
  end
74
77
  end
75
78
 
76
- def np_similarity
77
- results = run_uclust
78
- results.gsub!(/^[^C].*\n/, '')
79
- results.each_line do |c|
80
- cluster = c.split(/\t/)
81
- no_of_seq_in_cluster = cluster[3].to_i
82
- if no_of_seq_in_cluster > 1
83
- @sequence.score += (0.15 * no_of_seq_in_cluster)
79
+ def np_similarity(sequence, temp_dir, results = nil)
80
+ results = run_cdhit(sequence, temp_dir) if results.nil?
81
+ clusters = results.split(/^>Cluster \d+\n/)
82
+ clusters.each do |c|
83
+ next if c.nil?
84
+ no_of_seqs_in_cluster = c.split("\n").length
85
+ if no_of_seqs_in_cluster > 1
86
+ sequence.score += (0.15 * no_of_seqs_in_cluster)
84
87
  end
85
88
  end
86
89
  end
87
90
 
88
- def run_uclust
89
- f = Tempfile.new('uclust')
90
- fo = Tempfile.new('uclust_out')
91
- write_sequence_content_to_tempfile(f)
92
- `usearch -cluster_fast #{f.path} -id 0.5 -uc #{fo.path} >/dev/null 2>&1`
93
- IO.read(fo.path)
94
- ensure
95
- f.unlink
96
- fo.unlink
91
+ def run_cdhit(sequence, temp_dir)
92
+ f = Tempfile.new('clust', temp_dir)
93
+ fo = Tempfile.new('clust_out', temp_dir)
94
+ return unless write_potential_peptides_to_tempfile(sequence, f)
95
+ `cd-hit -c 0.5 -n 3 -l 4 -i #{f.path} -o #{fo.path}`
96
+ IO.read("#{fo.path}.clstr")
97
97
  end
98
98
 
99
- def write_sequence_content_to_tempfile(tempfile)
100
- content = ''
101
- @sequence.potential_cleaved_nps.each_with_index do |e, i|
102
- content += ">seq#{i}\n#{e[:np]}\n"
99
+ def write_potential_peptides_to_tempfile(sequence, tempfile)
100
+ return false if sequence.potential_cleaved_nps.empty?
101
+ sequences = ''
102
+ sequence.potential_cleaved_nps.each_with_index do |e, i|
103
+ sequences += ">seq#{i}\n#{e[:np]}\n"
103
104
  end
104
- tempfile.write(content)
105
+ tempfile.write(sequences)
105
106
  tempfile.close
107
+ true
106
108
  end
107
109
  end
108
110
  end
@@ -1,23 +1,26 @@
1
+ # Top level module / namespace.
1
2
  module NpSearch
3
+ # Adapted from GeneValidator's Query Class..
2
4
  # A class to hold sequence data
3
5
  class Sequence
4
- DI_NP_CLV = 'KR|KK|RR'
5
- MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'
6
+ DI_NP_CLV = 'KR|KK|RR'.freeze
7
+ MONO_NP_CLV = '[KRH]..R|[KRH]....R|[KRH]......R'.freeze
6
8
 
7
9
  attr_reader :id
10
+ attr_reader :defline
8
11
  attr_reader :signalp
9
12
  attr_reader :seq
10
13
  attr_reader :html_seq
11
- attr_reader :signalp_output
12
14
  attr_reader :translated_frame
13
15
  attr_accessor :score
14
16
  attr_accessor :potential_cleaved_nps
15
17
 
16
- def initialize(id, seq, signalp_output, frame = nil)
17
- @id = id
18
- sp_cleavage_site_idx = signalp_output[:ymax_pos].to_i - 1
19
- @signalp = seq[0..(sp_cleavage_site_idx - 1)]
20
- @seq = seq[sp_cleavage_site_idx..-1]
18
+ def initialize(entry, sp, frame = nil)
19
+ @id = entry.entry_id
20
+ @defline = entry.definition
21
+ sp_cleavage_site_idx = sp[:ymax_pos].to_i - 1
22
+ @signalp = sp[:orf][0..(sp_cleavage_site_idx - 1)]
23
+ @seq = sp[:orf][sp_cleavage_site_idx..-1]
21
24
  @html_seq = format_seq_for_html
22
25
  @translated_frame = frame
23
26
  @score = 0
@@ -27,7 +30,7 @@ module NpSearch
27
30
  def format_seq_for_html
28
31
  seq = @seq.gsub(/C/, '<span class=cysteine>C</span>')
29
32
  seq.gsub!(/#{DI_NP_CLV}/i, '<span class=np_clv>\0</span>')
30
- seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target R >>
33
+ seq.gsub!(/#{MONO_NP_CLV}/i, '\0::NP_CLV::') # so that we can target 'R'
31
34
  seq.gsub!('R::NP_CLV::', '<span class=mono_np_clv>R</span>')
32
35
  seq.gsub!('G<span class=np_clv>',
33
36
  '<span class=glycine>G</span><span class=np_clv>')
@@ -1,6 +1,6 @@
1
1
  require 'forwardable'
2
- require 'tempfile'
3
2
 
3
+ # Top level module / namespace.
4
4
  module NpSearch
5
5
  # A class to hold sequence data
6
6
  class Signalp
@@ -10,15 +10,34 @@ module NpSearch
10
10
 
11
11
  def analyse_sequence(seq)
12
12
  sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
13
- sp dmaxcut networks)
14
- f = Tempfile.new('signalp')
15
- f.write(">seq\n#{seq}")
16
- f.close
17
- s = `#{opt[:signalp_path]} -t euk -f short -U 0.3 -u 0.3 '#{f.path}' | \
18
- sed -n '3 p'`
19
- Hash[sp_headers.map(&:to_sym).zip(s.split)]
20
- ensure
21
- f.unlink
13
+ sp dmaxcut networks orf)
14
+ data = setup_analysis(seq)
15
+ orf_results = []
16
+ s = `echo "#{data[:fasta]}\n" | #{opt[:signalp_path]} -t euk \
17
+ -f short -U 0.34 -u 0.34`
18
+ sp_results = s.split("\n").delete_if { |l| l[0] == '#' }
19
+ sp_results.each_with_index do |line, idx|
20
+ line = line + ' ' + data[:seq][idx].to_s
21
+ orf_results << Hash[sp_headers.map(&:to_sym).zip(line.split)]
22
+ end
23
+ orf_results.sort_by { |h| h[:d] }.reverse[0]
24
+ end
25
+
26
+ def setup_analysis(seq)
27
+ if opt[:type] == :protein
28
+ data = { seq: [seq], fasta: ">seq\n#{seq}" }
29
+ else
30
+ orfs = seq.scan(/(?=(M\w+))./).flatten
31
+ orfs.unshift(seq)
32
+ data = { seq: orfs, fasta: create_orf_fasta(orfs) }
33
+ end
34
+ data
35
+ end
36
+
37
+ def create_orf_fasta(m_orf)
38
+ fasta = ''
39
+ m_orf.each_with_index { |seq, idx| fasta << ">#{idx}\n#{seq}\n" }
40
+ fasta
22
41
  end
23
42
  end
24
43
  end
@@ -1,4 +1,4 @@
1
1
  # Top level module / namespace.
2
2
  module NpSearch
3
- VERSION = '2.0.1'
3
+ VERSION = '2.1.0'.freeze
4
4
  end
data/lib/npsearch.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'bio'
2
2
  require 'fileutils'
3
3
 
4
- # require 'npsearch/arg_validator'
4
+ require 'npsearch/arg_validator'
5
5
  require 'npsearch/output'
6
6
  require 'npsearch/pool'
7
7
  require 'npsearch/scoresequence'
@@ -11,33 +11,39 @@ require 'npsearch/signalp'
11
11
  # Top level module / namespace.
12
12
  module NpSearch
13
13
  class <<self
14
- MIN_ORF_SIZE = 30 # amino acids (including potential signal peptide)
15
-
16
14
  attr_accessor :opt
17
15
  attr_accessor :sequences
18
16
  attr_reader :sorted_sequences
19
17
 
20
18
  def init(opt)
21
- # @opt = args_validation(opt)
22
- @opt = opt
23
- @sequences = []
19
+ @opt = ArgumentsValidators.run(opt)
20
+ @sequences = []
24
21
  @sorted_sequences = nil
25
- @opt[:type] = guess_sequence_type
26
- @pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
22
+ @pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
23
+ FileUtils.mkdir_p(@opt[:temp_dir])
24
+ extract_orf if @opt[:type] == :genetic
27
25
  end
28
26
 
29
27
  def run
30
- iterate_input_file
28
+ input_file = @opt[:type] == :protein ? @opt[:input_file] : @opt[:orf]
29
+ iterate_input_file(input_file)
31
30
  @sorted_sequences = @sequences.sort_by(&:score).reverse
32
31
  Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
33
32
  Output.to_html(@opt[:input_file])
33
+ remove_temp_dir
34
34
  end
35
35
 
36
36
  private
37
37
 
38
- def iterate_input_file
39
- biofastafile = Bio::FlatFile.open(Bio::FastaFormat, @opt[:input_file])
40
- biofastafile.each_entry do |entry|
38
+ # Uses getorf from EMBOSS package to extract all ORF
39
+ def extract_orf(input = @opt[:input_file], minsize = 90)
40
+ @opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
41
+ system "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
42
+ " -minsize #{minsize} >/dev/null 2>&1"
43
+ end
44
+
45
+ def iterate_input_file(input_file)
46
+ Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
41
47
  if @opt[:num_threads] > 1
42
48
  @pool.schedule(entry) { |e| initialise_seqs(e) }
43
49
  else
@@ -48,49 +54,18 @@ module NpSearch
48
54
  end
49
55
 
50
56
  def initialise_seqs(entry)
51
- if @opt[:type] == :protein
52
- initialise_protein_seq(entry.entry_id, entry.aaseq)
53
- else
54
- initialise_transcriptomic_seq(entry.entry_id, entry.naseq)
55
- end
56
- end
57
-
58
- def initialise_protein_seq(id, seq)
59
- sp = Signalp.analyse_sequence(seq)
60
- return unless sp[:sp] == 'Y'
61
- seq = Sequence.new(id, seq, sp)
62
- ScoreSequence.run(seq)
57
+ return if entry.aaseq.length > @opt[:max_seq_length]
58
+ sp = Signalp.analyse_sequence(entry.aaseq)
59
+ return if sp[:sp] == 'N'
60
+ # seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
61
+ seq = Sequence.new(entry, sp)
62
+ ScoreSequence.run(seq, @opt)
63
63
  @sequences << seq
64
64
  end
65
65
 
66
- def initialise_transcriptomic_seq(id, naseq)
67
- (1..6).each do |f|
68
- translated_seq = naseq.translate(f)
69
- orfs = translated_seq.to_s.scan(/(?=(M\w{#{MIN_ORF_SIZE},}))./).flatten
70
- initialise_orfs(id, orfs, f)
71
- end
72
- end
73
-
74
- def initialise_orfs(id, orfs, frame)
75
- orfs.each do |orf|
76
- sp = Signalp.analyse_sequence(orf)
77
- next if sp[:sp] == 'N'
78
- seq = Sequence.new(id, orf, sp, frame)
79
- ScoreSequence.run(seq)
80
- @sequences << seq
81
- # The remaining ORF in this frame are simply shorter versions of the
82
- # same orf so break loop once signal peptide is found.
83
- break if sp[:sp] == 'Y'
84
- end
85
- end
86
-
87
- def guess_sequence_type
88
- fasta_content = IO.binread(@opt[:input_file])
89
- # removing non-letter and ambiguous characters
90
- cleaned_sequence = fasta_content.gsub(/[^A-Z]|[NX]/i, '')
91
- return nil if cleaned_sequence.length < 10 # conservative
92
- type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
93
- (type == Bio::Sequence::NA) ? :nucleotide : :protein
66
+ def remove_temp_dir
67
+ return unless File.directory?(@opt[:temp_dir])
68
+ FileUtils.rm_rf(@opt[:temp_dir])
94
69
  end
95
70
  end
96
71
  end
data/npsearch.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
16
16
  ' For more information: https://github.com/wurmlab/npsearch'
17
17
  s.summary = 'Search for neuropeptides based on the common' \
18
18
  ' neuropeptides markers'
19
- s.homepage = 'https://github.com/IsmailM/NeuroPeptideSearch'
19
+ s.homepage = 'https://github.com/wurmlab/npsearch'
20
20
  s.license = 'AGPL'
21
21
 
22
22
  s.files = `git ls-files -z`.split("\x0")
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
28
28
  s.add_development_dependency 'bundler', '~> 1.6'
29
29
  s.add_development_dependency 'rake', '~>10.3'
30
30
  s.add_development_dependency 'coveralls'
31
+ s.add_development_dependency 'minitest', '~> 5.4'
31
32
 
32
33
  s.add_dependency 'bio', '~> 1.4'
33
34
  s.add_dependency 'slim', '~> 3.0'
@@ -29,9 +29,9 @@ html lang="en"
29
29
  p.sequence
30
30
  span.id
31
31
  - if @opt[:type] == :protein
32
- | >#{seq.id}
32
+ | >#{seq.defline}
33
33
  - elsif @opt[:type] == :nucleotide
34
- | >#{seq.id}-(frame:#{seq.translated_frame})
34
+ | >#{seq.defline}-(frame:#{seq.translated_frame})
35
35
  br
36
36
  span.seq== seq.html_seq
37
37
  br
@@ -50,5 +50,5 @@ html lang="en"
50
50
  a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
51
51
  br
52
52
  | This page was created by
53
- a href="https://github.com/wurmlab/NpSearch" target="_blank" NpSearch
53
+ a href="https://github.com/wurmlab/npsearch" target="_blank" NpSearch
54
54
  | v#{NpSearch::VERSION}
@@ -0,0 +1,167 @@
1
+ >isotig00006 gene=isogroup00003 length=1747 numContigs=6
2
+ AGTTAAAAGTTGAAAAATTGGTGACCATATTTTGACACTCTAGCATATTTGGGAGCTATA
3
+ TACTGATTTGGGTTTCACCATGCACAGATGAGGTATATACATAAGTTGAAAGCCTGCAGC
4
+ TCTATATTAAAGGCATTGAAGACtcGCCcAAaccgtgTGcgcccTCTGAAAAaGTTAACT
5
+ TTCcGTTgCTTGCAaGTGAAGTTTtcTtCTTGTCGCTACAAAATGCAGACAGTAATGAAA
6
+
7
+ >isotig00007 gene=isogroup00003 length=1749 numContigs=5
8
+ TGTGTGTGTGTGGTGCTTCCccTCTAGGGCTGTAAATTTCAAAGGAACCTTGCGCAAGAA
9
+ CAGtAGCTTGCGaCGTTTTTCAAaaCCAGAGGTTCTGAACTGAACTGTACTGACTACTGT
10
+ AGGGtacTTAAaGGCATTGAAGACTCGCCcAAaCCatgTGCCGCGctttGAAAAAGTTAA
11
+ CTTTCCGTTGCTTGCAAATGAcGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgA
12
+ AACGTGATACcTtGTtATCTTTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTG
13
+ TGTTGTGGgTATTGACcGTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGAC
14
+ GGTAGCAAGCTGTGTGTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACA
15
+ CCtCAtTcGAAACTAGGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACA
16
+ cccttttaaggagaagtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGG
17
+ TAGCATGCAACTTAAAAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATT
18
+ ACAAGGttAAtCtacTGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCT
19
+ TtCAaTAaTTATACAAACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTC
20
+ AAGTTTATCAATGTAATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCT
21
+ GTGGATAAGACTGCCAGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCC
22
+ TTCACCTCCTTGCAGATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATT
23
+ AAAATCTGGCTTCCTcCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGT
24
+ GTGAAACCACTGAAAGATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGA
25
+ CAGAGGCCACACTGATACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATT
26
+ TTGCTTCTGCGATGCAGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAa
27
+ CTGCCtGGTTtAtAGAGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTG
28
+ TCATGGCCTTGAGCAAGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTA
29
+ GGTACTGTcAAATCCACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAA
30
+ AGAGTACAATGAGGGTTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGC
31
+ AGCTGaaaaGATATTCAGAAaTTGTTATATATGAGTGTGTTTGTATGCATGCAtATGtGT
32
+ GATTTtCTtGCTTTACAGAACAGCTCCaTTTTGATAAGCTAtgTAAcgtGgAAACCTGCC
33
+ AATCAaTGTTtgAAataGGAcaGgCTGAAACGATTCTTAAATGAAAAGCTTAAtgaCTTc
34
+ TTgCAtttttaTACATCACTGTTCAGGtAaGGCCAGTAAGGgCAGTATgAaGAAtAaGTA
35
+ ACAATtAATAATTATCATTATGGCCATTTGCTGtcTGCATAAtAaCAAACTGAATGATGT
36
+ CATCAGCCCTgTGCTCAGTTGACAgAACTGACAAGTAGGCACACaaTGTCAGTGTGATCC
37
+ ATGAAACCT
38
+ >isotig00008 gene=isogroup00003 length=1726 numContigs=6
39
+ AGGTTTCATGGATCACACTGACAtTGTGTGCCTACTTGTCAGTTcTGTCAACTGAGCAcA
40
+ GGGCTGATGACATCATTCAGTTTGttattATGCAggaCAGCAAATGGCCATAATGATAAT
41
+ TATTAaTTGTTACTtaTTCTtcATACTGCCcTTACTGGCCTtaCCTGAACAGTGATGTAt
42
+ caaaaTGcAAgAAGtcaTTAAGCTTTTCATTTAAGAATCGTTTCAGCctgTCCtaatTTt
43
+ cAAaCAtTGATTGGCAGGTTTCcacgTTAcaTAGCTTATCAAAAtGGAGCTGTTCTGTAA
44
+ AGCAAGaAAATCACaCATaTGCATGCATACAAACACACTCATATATAACAAtTTCTGAAT
45
+ ATCTtttCAGCTGCCCAAAGGTACTGGAATGCAAGCTCTGTTGGGTGAATTAAAAAaCCc
46
+ TCATTGTACTCTTTTATCATGGTCAGCGTAGCTGGAACCAGCAATGATGGTGATGTGGAT
47
+ TTGACAGTACCTAGACAAGAATTCAATAGCTGCTTTGTGTTGAAGTGGGTTCAACTTGCT
48
+ CAAGGCCATGACAGGATGACGACTAGACATtAGGATGAATTATCTGTTGCAGAGCTCTAT
49
+ AAaCCAGGCAGTtGTtAAAaCATCCATTGGGTGACCcTCACAGTCTACCAGGCACTGCAT
50
+ CGCAGAAGCAAAATGAAGTGTGTGTgATACCTTCATCTTATGAGAGTGGAATGGTATCAG
51
+ TGTGGCCTCTGTCAATTTGGCTTCAAGTCTTTGTTTTCTTGAAAGCTGAGAaGATCTTTC
52
+ AGTGGTTTCACACTGACCTCTGTACTTGACAATTCATGTGTATTTGCCAGCTCAGgAGGA
53
+ AGCCAGATTTTAATTACATTAACCAATGCTGACTTTTTTttGGACATGTGGTACATCTGC
54
+ AAGGAGGTGAAGGAAGCAGTTGGAGTTGCATGGATGTGGAATCTTGTTGATAGTCTGGCA
55
+ GTCTTATCCACAGATTATCCCAAAGCTTCTCCACATTGCCACATTCAGAACTTATTACAT
56
+ TGATAAACTTGAAAATtGCAGGAATCTAaCcAaGCACCcATCAaGGGAaTTTGTTTGTAT
57
+ AATtATtGAAaGCTGTGACcTTCTGATGTGACAGACTAATGTGAAaTAAAGGgCAgtaGa
58
+ TTaCCTTGTaaTGAACCttGTTATTGTTTGATTGTATCTAAtGTTTGCAaaTTTTAAGTT
59
+ GCATGCTACCAATTGAAACATAATTCTTTCTCTAttaatgggatataaaatacttctcct
60
+ taaaagggTGTgAaGACTcggCACAAAGAAACGTCtaTGCcGgtAaTCTGACCTAGTTTc
61
+ gAatGaGGTGTAACagAAGTgTtAGACACcACCAttGATCCcAGAAAATACACACACAGC
62
+ TTGCTACCGTCGGTAaTTAGACACTAGTGTACAGTCAaTACATACAGCTAcGgTCAATAC
63
+ CCACAaCACAgTGTAcATAGCAGCGaTGgACATCTCAGGTCTAGATAAAAGATAaCAAGG
64
+ TATCACGTTTCATtaCTGTCTGCATTTtGTAGCgaCAagAAGAAAAcgtCATTtGCAAGC
65
+ AaTGgAAAGTtAACTTTTTCaGAGCGcagCAcGCgggTTGGGGCAAGTCTTCCAAGCCTT
66
+ TAAGTtGACAtcTTGCCTTTGGCTATCCAGGgTGACAAGATGATACTAGCAGGTAgagtg
67
+ actaattgagccctgtgtgagaaaccaatgcagaatctagcctagt
68
+ >isotig00009 gene=isogroup00003 length=1827 numContigs=2
69
+ TAGCTGTGATCTAGTGGATCTGACTGGCCTTTTGATTATTTCAGCacGATTCTCAGACTA
70
+ CAGTTGTAAaCCTACTTCGACTACTACTACTActagtacTAACGGTGCAACGTTGTTATA
71
+ AGTTTGCCAAAGGTGAAACTTTAGCCTTAGGACtGTGTTTATTTTATTTGCAGTCGCATT
72
+ CgCCTAACTGTTTTCTGTTACTGGGTGCATTTAACTCACATTAATAGAGGATTTTtGACT
73
+ AGTtCcTAGAGAGTGGTGTTTCTGTTTTACCACCATGGCAAAAAAGGGAAaGCCTCGCCC
74
+ TGACCATAGGCCTCCTGCACACAACCCGCATTATGCTCATGATCCACCACCTTATTCACA
75
+ ACAGCAACCACCACTTCAACAGCAGAACTATGCACAACAAATGCATCATGGTGGAGGTGG
76
+ TGGAAATAGACAACATGCACGACcTAGACCTAGTCCACCTTCAGAAGTCAGTGACTGTGT
77
+ CAAGTACTCCCTTTTCTtGTATAACTGCATCTTTTGgaTTGtCGGCCTTttCTTTATtGC
78
+ AGCAGGTATCTGGgCATTTCACGATAGGGGTGTTTTTAATGAATTCCAGTCACTTAGTAC
79
+ CAATGAGGTCTCCTTTCTCACTGATCCTGTTATTTGGCTGTTCGTCCTCGGAGGTGTAGT
80
+ TTTCATGCTGGGAACCCTCGGATGTCTgGGGgCCCTCAGAGAAAaTATCTGCATGCTGAA
81
+ GTGTTTTAGCATAATCATGGGGCTTATACTGCTGCTGGAAATTGGAGGTGGATGTGCGAT
82
+ ATACTTCTATCGTGCACAGATTCAGGCACAGTTTCAAAAGTCCTTAACAGATGTGaCCAT
83
+ AACAGATTACAGAGAAAATGCTGATTTCCAGGATCTCATAGACGCATTACAATCCGGTCT
84
+ TTCTTGTTGTGGTGTCAATTCCTatGAAGACTGGGATAATAATATTTATTTCAACTGTAG
85
+ TGGTCCTGCCAATAACCCTGAAGCcttGTGGTGTGCCTTtCTCCTGTTGTATACCGGATC
86
+ AAGCAAGCGGAGTAGCCAACACCCAGTGCGGTTATGGAGTTCGTTCCCCCGAACAACAAA
87
+ ATACTTTCCACACAAAGATTTACACCACTGGCTGTGCGGATATGTTTACAATGTGGATTA
88
+ ATAGGTACCTATATTACATAGCAGGCATTGCTGGGGTCATTGTCTTGGTCGAGTtGTTTG
89
+ GATTCTGTTTTGCACATTCCCTCATCAACGACATCAAACGCCAAAAGGCCCGCTGGGCGC
90
+ ATCGATAATTCATTCCAGGATGTTGGTGgATGATGCTACTCAAGGGagAAGACTGACAGT
91
+ GCCTTTtGGTCAaTATCGTGTAGCATCAGGAAGGAGGTAGTACCTCCTCAACTAACCaTA
92
+ ACAGAATTTGTCCAGTTTGTAACATCGTCAAGAAATAAACAGACTTTTTTTACCATTAGG
93
+ ACgTGATAATACTACCACGTAACCTCTCAAAGCACAAAAAGCAAAAAGCAAATATCTCCT
94
+ TGTTTTAAAATTAGaagGTCTATCTCAGATAACAACCACAGAACATgTGGAGTTTTCCtT
95
+ TATGCTATCATAAAGATATAAATATATATAAAATTGAGGTAGcATCtTGGCTACCCACCA
96
+ AAATCATTTTTTTTCCAGTTTGaAACATCATGGAACATTTCAGAACAAAGATCATTTCAG
97
+ TCGTTACCACACTCAAGAgaTTGCTGTcGTCAaCaTTTtGtaGCTTTTtAAtGTCTTGAT
98
+ CTTCGTCGACATCGTCAATGTGTAAACTATTCTCGACGAGAGATTAGTGTCTAATACTGC
99
+ GGGTgATTTGATATAAATCTCACTTGG
100
+ >isotig00010 gene=isogroup00003 length=1650 numContigs=5
101
+ TGAATGAGAAAtGAAATTTAGCGAAGAAATCACCTTGTAAATTAAAAACTAAAATGGCTT
102
+ TCACACAAATTAaCAGTAAAtGgAGAATGTTTTTAAAGCAATATATGCAGTACAGCcATT
103
+ CATTGGAAAACAGTAAcAAAaTACATTTATCTTGTtcATTTTtACctCctGCAAaacTTA
104
+ cAaCcGTTAATTATGTAGATTGGATGGCACTAACAGGGTACTTGTCTTATCTGCCTATTG
105
+ GATAATGTGGcATTAATACTACTGTGTATGGGCACTGAGGCTGAGAGTGCAGTAAGTTtA
106
+ AAGGCATTGAAGACTCtCCCCGAaCcGCGtGCCGGGCTctGAAAAAGTtAaCTGCTCGCA
107
+ AaTtAcGTTTtCTtCTTGTCaCTaCAAAaTGCAGACATTaaTGAAACGTGATACCTTGTt
108
+ ATCTTTTATCTAGACCTGAGATGTCcAtCGCTGCTATgTACAcTGTGTTGTGGGTATTGA
109
+ CcgTAGCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGT
110
+ GTGTATTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTA
111
+ GGTCAGAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaa
112
+ gtattttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAA
113
+ AAtTTTGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtac
114
+ TGCCCTTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACA
115
+ AACAAATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTA
116
+ ATAAGTTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCC
117
+ AGACTATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAG
118
+ ATGTACCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCT
119
+ cCTGAGCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAA
120
+ GATCTTCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGA
121
+ TACCATTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGC
122
+ AGTGCCTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAG
123
+ AGCTCTGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCA
124
+ AGTTGAACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCC
125
+ ACATCACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGG
126
+ TTTTTTAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTT
127
+ TGTTCCTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAG
128
+ CACAGTGTGCAAAGctGCTATATATTGTCC
129
+ >isotig00011 gene=isogroup00003 length=1525 numContigs=6
130
+ ACATTCTTCAAGAGCTCTGCACCCACCAATCTAAAGTGACCAGCCAAGTGACTGACCTCA
131
+ GGGCACAGTTAGCAGCTTTGACCACAGGATGAGCTATGTAACAACTGAAtgaaTGGTGTT
132
+ CAtcGTTGATTGGGCAgTCAAAACAGCTGAATTTCTCTTGCGgAAGACATAAAGGCATTG
133
+ AAGACtcGCCcAAaccGtGTGcgcccTCTGAAAAaGTTAACTTTctGTTgCTTGCAaGTG
134
+ AAGTTTtcTtCTtGTCgCTACAAAATGCAGACAGTAaTgAAACGTGATACcTtGTtATCT
135
+ TTtATCTAgACctGAGATGtCcACGCTGCTATGTACACTGTGTTGTGGgTATTGACcGTA
136
+ GCTGTATGTATtGACTGTACACTAGTGTCTAATtACCGACGGTAGCAAGCTGTGTGTGTA
137
+ TTTTCTGGGATCaaTGGTGgTGTCTAACACTTCtGTTACACCtCAtTcGAAACTAGGTCA
138
+ GAtTAcCgGCATAGACGTTTCTTTGTGCcgAGTCtTCACAcccttttaaggagaagtatt
139
+ ttatatcccattaaTAGAGAAAGAATTATGTTTCAATTGGTAGCATGCAACTTAAAAtTT
140
+ TGCAAACaTTAGATACAATCAAACAATAACAAGGTTCATTACAAGGttAAtCtacTGCCC
141
+ TTTATTtCACATTaGTCTGTCACATCAGAAGgTCACAGCTTtCAaTAaTTATACAAACAA
142
+ ATtCCCTtGATGGgTGCTtGgTtAGATTCCTGCaatTTTCAAGTTTATCAATGTAATAAG
143
+ TTCTGAATGTGGCAATGTGGaaGAAGCtTtGGGATAATCTGTGGATAAGACTGCCAGACT
144
+ ATCAACAAGATTCCACATCCATGCAACTCCAACTGCTTCCTTCACCTCCTTGCAGATGTA
145
+ CCACATGTCCaaAAAAAAGTCAGCATTGGTTAATGTAATTAAAATCTGGCTTCCTcCTGA
146
+ GCTGGCAAATACACATGAATTGTCAAGTACAGAGGTCAGTGTGAAACCACTGAAAGATCT
147
+ TCTCAGCTTTCAAGAAAACAAAGACTTGAAGCCAAATTGACAGAGGCCACACTGATACCA
148
+ TTCCACTCTCATAAGATGAAGGTATCACACACACTTCATTTTGCTTCTGCGATGCAGTGC
149
+ CTGGTAGACTGTGAGGgTCACCCAATGGATgtTTTAaCAaCTGCCtGGTTtAtAGAGCTC
150
+ TGCAACAGATAATTCATCCTAaTGTCTAGTCGTCATCCTGTCATGGCCTTGAGCAAGTTG
151
+ AACCCACTTCAACACAAAGCAGCTATTGAATTCTTGTCTAGGTACTGTcAAATCCACATC
152
+ ACCATCATTGCttGGTTCCAGCTaCGcTGACCATGaTAAAAGAGTACAATGAGGGTTTTT
153
+ TAATTCACCCAACAGAGCTTGCATTCCAGTACCTTTGGGCAGCTGATATCCATTTTGTTC
154
+ CTCGTATgCCTGTCAAAATCTGACATTctGagTCGCTTCGTTTGTTCGCAACGAGCACAG
155
+ TGTGCAAAGctGCTATATATTGTCC
156
+ >isotig00001_f6
157
+ FRIYKNYALQYVSKDAHLSLLWSAVTHLGTQYFGKTPFVLTYILLTECAVESCMEACIETLINNYEKDKLLPLQYYTSYFLCRHLAKCDYSKTDKILNVCQRNLMAFLLKINPFHLLRHRLAPNRMQPYEKVNFLLLASDVLFVSLQNADSNETRYIVIFSWTDVHRCTVCTLCCGCLQLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITAPFFLRESSHRIMIIGFSNLEPLTKCVTELIFLVTKKTLVHMRERYINMSIRGRVVYLLPDSADNSQLFENTLCYISSSFKAPKPNQQNKKPNEQRNCMTNVNFSKIELLKHISQHISHRANRTELELTVHCTFHFFLYLKVTTKLHRKTKTSSLHFNHKNIFALITKKIRYTIRQTLPYLKPLRLCSTLSVLNCLYSTVLTRQISIIKISKIYYIHNLKDVECLSGVKAGAFQSGPLFESLQDCMSSSYRVVENRQFIIMDVKYETKRLTSVSLRLANEASSRVPACRMTNTYIYITVMRNAYLPKRCSYTRKGSTHSHLLKVDVVVHVVYFHHLHHDAFVVHSSAVEVVVAVVNKVVDHEHNAGCVQEAYGQGEAFPFLPWWNRNTTLELVKNPLLMVKCTQQKTVRRMRLQIKTQSGSFTFGKLITTLHRYSKVYNCSLRIVLKSKGQSDPLDHS
158
+ >isotig00002_f1
159
+ MRNEIRRNHLVNKLKWLSHKLTVNGECFSNICSTAIHWKTVTKYIYLVHFYLLQNLQPLIMIGWHQGTCLICLLDNVALILLCMGTEAESAVSLKALKTLPEPRAGLKSLLANYVFFLSLQNADINETYLVIFYLDLRCPSLLCTLCCGYPLYVLTVHCLITDGSKLCVYFLGSMVVSNTSVTPHSKLGQITGIDVSLCRVFTPFGEVFYIPLIEKELCFNWHATNFANIRYNQTITRFITRLIYCPLFHISLSHQKVTAFNNYTNKFPWVLGIPAIFKFINVISSECGNVEEALGSVDKTARLSTRFHIHATPTASFTSLQMYHMSKKKSALVNVIKIWLPPELANTHELSSTEVSVKPLKDLLSFQENKDLKPNQRPHYHSTLIRRYHTHFILLLRCSAWTVRVTQWMFQLPGLSSATDNSSCLVVILSWPASTHFNTKQLLNSCLGTVKSTSPSLLGSSYADHDKRVQGFFNSPNRACIPVPLGSKDIQKLLYMSVFVCMHMCDFLALQNSSILISYVTWKPANQCLKDRLKRFLNEKLNDFLHFYTSLFRGQGQYEEVTINNYHYGHLLSAQTECHQPCAQLTELTSRHTMSVSMKP
160
+ >isotig00008_f3
161
+ VSWITLTLCAYLSVLSTEHRADDIIQFVIMQDSKWPLLIVTYSSYCPYWPYLNSDVSKCKKSLSFSFKNRFSLSFSNIDWQVSTLHSLSKWSCSVKQENHTYACIQTHSYITISEYLFSCPKVLECKLCWVNKTLIVLFYHGQRSWNQQWCGFDSTTRIQLLCVEVGSTCSRPQDDDTLGIICCRALTRQLLKHPLGDPHSLPGTASQKQNEVCVIPSSYESGMVSVWPLSIWLQVFVFLKAEKIFQWFHTDLCTQFMCICQLRRKPDFNYINQCLFFGHVVHLQGGEGSSWSCMDVESCSGSLIHRLSQSFSTLPHSELITLINLKIAGIPSTHQGNLFVLLKAVTFCDRLMNKGQITLTLLLFDCICLQILSCMLPIETFFLYWDIKYFSLKGCEDSAQRNVYAGNLTFRMRCNRSVRHHHSQKIHTQLATVGNTLVYSQYIQLRSIPTTQCTQRWTSQVIKDNKVSRFITVCILRQEENVICKQWKVNFFRAQHAGWGKSSKPLSHLAFGYPGQDDTSRSDLSPVETNAESSL
162
+ >isotig00010_f3
163
+ NEKNLAKKSPCKLKTKMAFTQINSKWRMFLKQYMQYSHSLENSNKIHLSCSFLPPAKLTTVNYVDWMALTGYLSYLPIGCGINTTVYGHGECSKFKGIEDSPRTACRALKKLTARKLRFLLVTTKCRHNVIPCYLLSRPEMSIAAMYTVLWVLTVAVCIDCTLVSNYRRQAVCVFSGINGGVHFCYTSFETRSDYRHRRFFVPSLHTLLRRSILYPINRERIMFQLVACNLKFCKHIQSNNNKVHYKVNLLPFISHSVTSEGHSFQLYKQIPLMGAWLDSCNFQVYQCNKFMWQCGRSFGIICGDCQTINKIPHPCNSNCFLHLLADVPHVQKKVSIGCNNLASSAGKYTIVKYRGQCETTERSSQLSRKQRLEAKLTEATLIPFHSHKMKVSHTLHFASAMQCLVDCEGHPMDVLTTAWFIELCNRFILMSSRHPVMALSKLNPLQHKAAIEFLSRYCQIHITIIAWFQLRPKSTMRVFFTQQSLHSSTFGQLISILFLVCLSKSDILSRFVCSQRAQCAKLLYIV
164
+ >isotig00012_f3
165
+ LKVEKLVTIFHSSIFGSYILIWVSPCTDEVYTVESLQLYIKGIEDSPKPCAPSEKVNFPLLASEVFFLSLQNADSNETYLVIFYLDLRCPRCYVHCVVGIDRSCMYLYTSVLPTVASCVCIFWDQWWCLTLLLHLIRNVRLPATFLCAESSHPFKEKYFISHRKNYVSIGSMQLKILQTLDTIKQQGSLQGSTALYFTLVCHIRRSQLSIIIQTNSLDGCLVRFLQFSSLSMVLNVAMWKKLWDNLWIRLPDYQQDSTSMQLQLLPSPPCRCTTCPKKSQHWLMLKSGFLLSWQIHMNCQVQRSVNHKIFSAFKKTKTSQIDRGHTDTIPLSDEGITHTSFCFCDAVPGRLGSPNGCFNNCLVYRALQQIIHPNVSSSCHGLEQVEPTSTQSSYILVVLSNPHHHHCLVPATLTMIKEYNEGFLIHPTELAFQYLWAADIHFVPRMPVKIHSESLRLFATSTVCKAAIYC
166
+ >isotig00015_f4
167
+ RRPYVARVKVINQWLSLELFLQIKVSTKKITVFEGGHRYERSTGQTRTILTAFHPISRTRIEPQTILHCGRTGHSSWDRGVEHTFTTMLPLYNTSFVSSMLPWCMEFCNQQKSLSSVSIATSIRAASMELQGVNVTFYCSVVRNHVCQHGVALKGCEDSRKKNVCRSDLVSNEVQKCTLPSIPENTNTACYRRLDTSVQSIHTATVNTHNTVYIAAMDISGLDKRQGITFHCLHFVVTRRKRNLRAVNFFRARHAVRGESSMPLNLLHSQPQCPYTVVLMPHYPIGRDKYPVSAIQSTLTVVSFAGGKNEQDKCILLLFSNEWLYCIYCFKNILHLLLICVKAILVFNLQGDFFAKFHFSF
@@ -0,0 +1,50 @@
1
+ require_relative 'test_helper'
2
+
3
+ require 'npsearch/arg_validator'
4
+
5
+ # Class to test the how well the CLI arguments are validated.
6
+ class TestInputArgumentValidator < Minitest::Test
7
+ def setup
8
+ @c = NpSearch::ArgumentsValidators
9
+ @opt = { num_threads: 1, min_orf_length: 30 }
10
+ end
11
+
12
+ def test_assert_file_present
13
+ @c.send(:assert_file_present, 'existing file',
14
+ 'test/files/genetic.fa', 1)
15
+ assert_raises(SystemExit) do
16
+ @c.send(:assert_file_present, 'non-existing file',
17
+ 'test/files/nope_dont_exist.fa', 1)
18
+ end
19
+ end
20
+
21
+ def test_assert_input_file_not_empty
22
+ f = 'test/files/genetic.fa'
23
+ @c.send(:assert_input_file_not_empty, f)
24
+ f = 'test/files/empty_file.fa'
25
+ assert_raises(SystemExit) { @c.send(:assert_input_file_not_empty, f) }
26
+ end
27
+
28
+ def test_assert_input_file_probably_fasta
29
+ f = 'test/files/genetic.fa'
30
+ @c.send(:assert_input_file_probably_fasta, f)
31
+ f = 'test/files/not_fasta.fa'
32
+ assert_raises(SystemExit) { @c.send(:assert_input_file_probably_fasta, f) }
33
+ end
34
+
35
+ def test_assert_input_sequence
36
+ f = 'test/files/genetic.fa'
37
+ @c.send(:assert_input_sequence, f)
38
+ f = 'test/files/protein.fa'
39
+ @c.send(:assert_input_sequence, f)
40
+ f = 'test/files/mixed_content.fa'
41
+ assert_raises(SystemExit) { @c.send(:assert_input_sequence, f) }
42
+ end
43
+
44
+ def test_check_num_threads
45
+ [1, 50, 300].each do |t|
46
+ @c.send(:check_num_threads, t)
47
+ end
48
+ assert_equal(1, @c.send(:check_num_threads, -3))
49
+ end
50
+ end
@@ -0,0 +1 @@
1
+ require 'minitest/autorun'