bacterial-annotator 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: feff352efae5fee416e2ed704a1ba20e36c391f3
4
+ data.tar.gz: 852b8ce7a8c6fa18bae196434ef47a8687584e32
5
+ SHA512:
6
+ metadata.gz: 794d2ee5ae969f986e0e5a3dbdf65425c6a8b9b39d72df383b8bcb09f1f51ab790355e186c6bf1081bf84f0bd3ee39cdde25e450de5939e14b70114a7a38ac69
7
+ data.tar.gz: 94c90664667c6d913d4396ec1e0bda356bc3fff86d3e1d4f91b91ba6fb55db16d7310e43f3e44bfc9ba8efd76b0b2e87d2f6b28d1fd08b2f222b798771cf2c3a
data/bin/ba_blat ADDED
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ # author: maxime déraspe
4
+ # email: maxime@deraspe.net
5
+ # review:
6
+ # date: 15-02-24
7
+ # version: 0.01
8
+ # licence:
9
+
10
+ require 'net/http'
11
+
12
+ ROOT_path = File.dirname(__FILE__)
13
+ # blat URL = "http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat"
14
+
15
+ # Install blat on the user system
16
+ def installBlat
17
+
18
+ begin
19
+ Net::HTTP.start("hgdownload.cse.ucsc.edu") do |http|
20
+ resp = http.get("/admin/exe/linux.x86_64/blat/blat")
21
+ open("#{ROOT_path}/blat.linux", "wb") do |file|
22
+ file.write(resp.body)
23
+ end
24
+ end
25
+
26
+ File.chmod(0755, "#{ROOT_path}/blat.linux")
27
+ rescue
28
+ abort "Problem installing Blat, aborting"
29
+ end
30
+
31
+ end
32
+
33
+
34
+ # Install blat if not already install
35
+ if ! File.exists? "#{ROOT_path}/blat.linux"
36
+
37
+ puts "Installing Blat the aligner.."
38
+ puts ""
39
+ puts "Please note that the Blat source and executables are freely available for"
40
+ puts "academic, nonprofit and personal use. Commercial licensing information is"
41
+ puts "available on the Kent Informatics website (http://www.kentinformatics.com/)."
42
+ puts "See http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/"
43
+ puts ""
44
+ puts "Do you accept the license ? Y/n"
45
+
46
+ answer = $stdin.gets.chomp
47
+
48
+ if answer.downcase == "y" or answer.downcase == ""
49
+ puts "OK you accepted the licence let's install the blat binary"
50
+ installBlat
51
+ puts "Blat successfully installed : OK"
52
+ puts ""
53
+ else
54
+ puts "Sorry bacterial-annotator rely on blat for the alignment !"
55
+ end
56
+
57
+ end
58
+
59
+
60
+ # system("#{ROOT_path}/blat.linux")
data/bin/ba_prodigal ADDED
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ # author: maxime déraspe
4
+ # email: maxime@deraspe.net
5
+ # review:
6
+ # date: 15-02-24
7
+ # version: 0.01
8
+ # licence:
9
+
10
+ require 'open-uri'
11
+
12
+ ROOT_path = File.dirname(__FILE__)
13
+ # prodigal URL = "https://github.com/hyattpd/Prodigal/releases/download/v2.6.2/prodigal.linux"
14
+
15
+ # Install prodigal on the user system
16
+ def installProdigal
17
+
18
+ begin
19
+ resp = open("https://github.com/hyattpd/Prodigal/releases/download/v2.6.2/prodigal.linux")
20
+ open("#{ROOT_path}/prodigal.linux", "wb") do |file|
21
+ file.write(resp.read)
22
+ end
23
+ File.chmod(0755, "#{ROOT_path}/prodigal.linux")
24
+ rescue
25
+ abort "Problem installing Prodigal, aborting"
26
+ end
27
+
28
+ end
29
+
30
+
31
+ # Install prodigal if not already install
32
+ if ! File.exists? "#{ROOT_path}/prodigal.linux"
33
+
34
+ puts "Installing Prodigal the ORF finder.."
35
+ puts "See https://github.com/hyattpd/Prodigal"
36
+ puts "The Licence is GPLv3"
37
+ installProdigal
38
+ puts "Prodigal successfully installed"
39
+ puts ""
40
+
41
+ end
42
+
43
+ # Main
44
+ # Dir.mkdir("Prodigal-Output") if ! File.exists? "Prodigal-Output"
45
+ # system("#{ROOT_path}/prodigal.linux -a Prodigal-Output/Proteins.fa -d Prodigal-Output/Genes.fa -o Prodigal-Output/Genbanks.gbk -i #{ARGV[0]}")
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ # author: maxime déraspe
4
+ # email: maxime@deraspe.net
5
+ # review:
6
+ # date: 15-02-24
7
+ # version: 0.01
8
+ # licence:
9
+
10
+
11
+ require 'bacterial-annotator'
12
+
13
+
14
+ # Usage message to print to CLI
15
+ def usage
16
+
17
+ print <<OEM
18
+
19
+ bacterial-annotator [OPTIONS]
20
+
21
+ [OPTIONS]
22
+
23
+ // IO
24
+
25
+ --input/-i <fasta_file> Provide the fasta file to annotate
26
+ --outdir/-o <outdir> Output directory [default=BAnnotation]
27
+ --force/-f Force to overwrite the output directory
28
+
29
+ // Dataset
30
+
31
+ --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
32
+ --guessref Will guess the best reference genome to use for the annotation.
33
+
34
+ --remotedb <remote_database> [nr|refseq|swissprot]
35
+ Complete the annotation of remaining CDS with a remote NCBI BLAST
36
+ Can be very slow, better to use an external database !
37
+
38
+ --externaldb <proteins fasta_file>
39
+ Complete or do the annotation of remaining CDS with this database (a protein fasta file).
40
+ Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
41
+ >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
42
+ >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
43
+
44
+ // Other options
45
+
46
+ --pidentity Minimum percentage identity to incorporate a CDS annotation [default=0.7]
47
+ --minlength Minimum contig length for annotation [default=500]
48
+
49
+ --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
50
+ --gff Will also generate gff annotation files [off by default]
51
+
52
+ --help/-h Print this !
53
+ OEM
54
+
55
+ end
56
+
57
+ # Parse the Options given on the CLI
58
+ def parseOptions
59
+
60
+ options = {}
61
+
62
+ # default options
63
+ options[:outdir] = "BAnnotation"
64
+ options[:pidentity] = 70
65
+ options[:minlength] = 500
66
+ options[:meta] = 0
67
+
68
+ while x = ARGV.shift
69
+
70
+ case x.downcase
71
+ when "--input", "-i"
72
+ options[:input] = ARGV.shift
73
+ when "--refgenome", "-g"
74
+ options[:refgenome] = ARGV.shift
75
+ when "--outdir", "-o"
76
+ options[:outdir] = ARGV.shift
77
+ when "--force", "-f"
78
+ options[:force] = 1
79
+ when "--gff"
80
+ options[:gff] = 1
81
+ when "--minlength"
82
+ options[:minlength] = ARGV.shift
83
+ when "--pidentity"
84
+ options[:pidentity] = ARGV.shift
85
+ when "--meta"
86
+ options[:meta] = 1
87
+ when "--remotedb"
88
+ options[:remote_db] = ARGV.shift
89
+ when "--externaldb"
90
+ options[:external_db] = ARGV.shift
91
+ when "--help", "-h"
92
+ usage
93
+ end
94
+
95
+ end
96
+
97
+ options
98
+
99
+ end
100
+
101
+
102
+ # Main
103
+ if ARGV.size > 1
104
+
105
+ ROOT = File.dirname(__FILE__)
106
+ options = parseOptions
107
+
108
+ # Check for 3rd party dependencies : Prodigal and Blat
109
+ system("ba_prodigal")
110
+ system("ba_blat")
111
+
112
+ if ! File.exist? ("#{ROOT}/blat.linux")
113
+ abort "exiting blat is missing"
114
+ end
115
+
116
+ # Check Options
117
+ if ! options.has_key? :refgenome and ! options.has_key? :remote_db and ! options.has_key? :external_db
118
+ puts "You didn't provide a reference genome or a database for the annotation !"
119
+ elsif ! options.has_key? :input
120
+ puts "You didn't provide a fasta file to annotate !"
121
+ elsif
122
+ puts ""
123
+ end
124
+
125
+ bannot = BacterialAnnotator.new(options, ROOT)
126
+ bannot.prepare_files_for_annotation
127
+ bannot.run_annotation
128
+
129
+ else
130
+ usage
131
+ end
@@ -0,0 +1,406 @@
1
+ # -*- coding: utf-8 -*-
2
+ # author: maxime déraspe
3
+ # email: maxime@deraspe.net
4
+ # review:
5
+ # date: 15-02-24
6
+ # version: 0.0.1
7
+ # licence:
8
+
9
+ require 'bio'
10
+ require 'fileutils'
11
+
12
+ require 'bacterial-annotator/genbank-manip'
13
+ require 'bacterial-annotator/fasta-manip'
14
+ require 'bacterial-annotator/synteny-manip'
15
+ require 'bacterial-annotator/remote-ncbi'
16
+
17
+ class BacterialAnnotator
18
+
19
+ # Initialize BacterialAnnotator
20
+ # options[:input], options[:refgenome], ROOT, options[:outdir], options)
21
+ def initialize options, root
22
+
23
+ @root = root
24
+ @options = options
25
+ @outdir = @options[:outdir]
26
+
27
+ @minlength = @options[:minlength].to_i
28
+ @pidentity = @options[:pidentity].to_f
29
+ @pidentity = @pidentity * 100 if @pidentity < 1.00
30
+
31
+ if File.exists? (@outdir)
32
+ if ! options.has_key? :force
33
+ abort "Output directory already exist ! Choose another one or use -f to overwrite"
34
+ else
35
+ puts "Overwriting output directory #{@outdir}"
36
+ FileUtils.remove_dir(@outdir, force=true)
37
+ end
38
+ end
39
+ Dir.mkdir(@outdir)
40
+
41
+ @fasta = FastaManip.new(@options[:input], @options[:meta])
42
+
43
+ @with_refence_genome = false
44
+ if @options.has_key? :refgenome
45
+ @with_refence_genome = true
46
+ @refgenome = GenbankManip.new(@options[:refgenome], @outdir)
47
+ end
48
+
49
+ @prot_synteny = nil
50
+ @annotation_stats = {by_contigs: {},
51
+ annotated_cds: 0,
52
+ total_cds: 0,
53
+ foreign_contigs: [],
54
+ synteny_contigs: [],
55
+ short_contigs: []}
56
+
57
+ @contig_foreign_cds = {}
58
+ @contig_annotations = {}
59
+
60
+ end # end of method
61
+
62
+ # Prepare files for the annotation
63
+ # Will run prodigal on the query and prepare reference genome files
64
+ def prepare_files_for_annotation
65
+ puts "\nRunning Prodigal on your genome.."
66
+ @fasta.run_prodigal @root, @outdir
67
+ puts "Prodigal done."
68
+ if @with_refence_genome
69
+ @refgenome.write_cds_to_file @outdir
70
+ puts "Successfully loaded #{@refgenome.gbk.definition}"
71
+ end
72
+ end # end of method
73
+
74
+ # run_alignment of reference genome proteins and the query
75
+ def run_annotation
76
+
77
+ # process reference genome synteny
78
+ if @with_refence_genome # Annotation with the Reference Genome
79
+
80
+ @prot_synteny = SyntenyManip.new(@fasta.prodigal_files[:proteins], @refgenome.cds_file, "Prot-Ref", @pidentity)
81
+ puts "\nRunning BLAT alignment with Reference Genome.."
82
+ @prot_synteny.run_blat @root, @outdir
83
+ @prot_synteny.extract_hits :refgenome
84
+
85
+ @fasta.prodigal_files[:contigs].each_with_index do |contig, contig_index|
86
+
87
+ # Skip short contigs
88
+ if @fasta.prodigal_files[:contigs_length][contig_index] < @minlength
89
+ @annotation_stats[:short_contigs] << contig
90
+ next
91
+ end
92
+
93
+ contig_prots = @fasta.prodigal_files[:prot_ids_by_contig][contig]
94
+
95
+ # contig_prot_annotations = @prot_synteny.get_annotation_for_contig contig_prots, @refgenome.coding_seq
96
+ @contig_annotations[contig] = @prot_synteny.get_annotation_for_contig contig_prots, @refgenome.coding_seq
97
+
98
+ remaining_cds = cumulate_annotation_stats_reference contig, @contig_annotations[contig]
99
+
100
+ if ! remaining_cds.empty?
101
+ @contig_foreign_cds[contig] = remaining_cds
102
+ end
103
+
104
+ end
105
+
106
+ # dump foreign proteins to file
107
+ foreign_cds_file = dump_cds
108
+
109
+ else
110
+
111
+ # no reference genome .. will process all the CDS
112
+ foreign_cds_file = @fasta.prodigal_files[:proteins]
113
+
114
+ end
115
+
116
+ # Finishing annotation for foreign proteins
117
+ finish_annotation foreign_cds_file
118
+
119
+ # Parse annotations to genbank files
120
+ parsing_genbank_files
121
+
122
+ puts "\nPrinting Statistics.."
123
+ print_stats "#{@outdir}/Annotation-Stats.txt"
124
+
125
+ end # end of method
126
+
127
+
128
+ # Finishing the annotation of the remaining CDS
129
+ def finish_annotation remaining_cds_file
130
+
131
+ if @options.has_key? :external_db # from an external DB
132
+
133
+ db_file = @options[:external_db]
134
+ ref_cds = extract_externaldb_prot_info db_file
135
+
136
+ externaldb_synteny = SyntenyManip.new(remaining_cds_file, db_file, "Prot-ExternalDB", @pidentity)
137
+ puts "\nRunning BLAT alignment with External Database.."
138
+ externaldb_synteny.run_blat @root, @outdir
139
+ externaldb_synteny.extract_hits :externaldb
140
+
141
+ externaldb_synteny.aln_hits.each do |k,v|
142
+ contig_of_protein = k.split("_")[0..-2].join("_")
143
+
144
+ if ! @contig_annotations.has_key? contig_of_protein
145
+ @contig_annotations[contig_of_protein] = {}
146
+ end
147
+
148
+ hit_gi = v[:hits][0]
149
+
150
+ note = "correspond to gi:#{hit_gi}"
151
+
152
+ # p v
153
+ # p ref_cds[hit_gi]
154
+
155
+ if ref_cds[hit_gi][:org] != ""
156
+ note += " from #{ref_cds[hit_gi][:org]}"
157
+ end
158
+ @contig_annotations[contig_of_protein][k] = {product: ref_cds[hit_gi][:product],
159
+ gene: nil,
160
+ locustag: nil,
161
+ note: note}
162
+
163
+ end
164
+
165
+
166
+ elsif @options.has_key? :remote_db # from a remote DB
167
+
168
+ # do it by chunk to avoid NCBI CPU exceeding limit
169
+ cds_files = split_remaining_cds_file remaining_cds_file
170
+ @remotedb = @options[:remote_db]
171
+
172
+ puts "\n# NCBI Blast on #{@remotedb}"
173
+
174
+ cds_files.each do |cds_file|
175
+
176
+ # remotedb = @options[:remote_db]
177
+ valid = true
178
+ begin
179
+ # puts "\nNCBI blast on #{@remotedb} for #{cds_file}"
180
+ ncbiblast = RemoteNCBI.new(@remotedb,
181
+ cds_file,
182
+ "#{cds_file}.#{@remotedb}.xml",
183
+ @pidentity)
184
+ rescue
185
+ valid = false
186
+ end
187
+
188
+ # ncbi blast didn't worked out
189
+ if !valid
190
+ puts "Problem NCBI blast for foreign proteins"
191
+ else
192
+ ncbiblast.extract_blast_results
193
+ if ! ncbiblast.aln_hits
194
+ puts "Didn't produce the annotation for #{cds_file}"
195
+ next
196
+ end
197
+ ncbiblast.aln_hits.each do |k,v|
198
+ contig_of_protein = k.split("_")[0..-2].join("_")
199
+ # @contig_annotations[contig_of_protein][k][:product] = v[:hits][0][:product]
200
+ if ! @contig_annotations.has_key? contig_of_protein
201
+ @contig_annotations[contig_of_protein] = {}
202
+ end
203
+ note = "correspond to gi:#{v[:hits][0][:gi]}"
204
+ if v[:hits][0][:org] != ""
205
+ note += " from #{v[:hits][0][:org]}"
206
+ end
207
+ @contig_annotations[contig_of_protein][k] = {product: v[:hits][0][:product],
208
+ gene: nil,
209
+ locustag: nil,
210
+ note: note}
211
+ end
212
+
213
+ end
214
+
215
+ end
216
+
217
+ end
218
+
219
+ end # end of method
220
+
221
+
222
+ # parse all genbank files
223
+ def parsing_genbank_files
224
+
225
+ puts "\nParsing annotation into genbank files.."
226
+ @contig_annotations.each do |contig, contig_prot_annotations|
227
+ gbk_path = @fasta.prodigal_files[:gbk_path]
228
+ gbk_to_annotate = GenbankManip.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
229
+ gbk_to_annotate.add_annotation contig_prot_annotations, gbk_path, 0
230
+ end
231
+
232
+ end # end of method
233
+
234
+
235
+ # cumulate the stats for the synteny
236
+ # return : unannotated cds array
237
+ def cumulate_annotation_stats_reference contig, contig_prots_ann
238
+
239
+ remaining_cds = []
240
+ contig_prots = @fasta.prodigal_files[:prot_ids_by_contig][contig]
241
+
242
+ @annotation_stats[:total_cds] += contig_prots.length if contig_prots
243
+ contig_prots_ann.each do |k,v|
244
+ if v != nil
245
+ @annotation_stats[:annotated_cds] += 1
246
+ else
247
+ remaining_cds << k
248
+ end
249
+ end
250
+
251
+ # Annotated Contigs
252
+ if contig_prots_ann.keys.length < 1
253
+ @annotation_stats[:foreign_contigs] << contig
254
+ else
255
+ @annotation_stats[:synteny_contigs] << contig
256
+ end
257
+
258
+ remaining_cds
259
+ end # end of method
260
+
261
+
262
+ # print statistics to file
263
+ def print_stats file
264
+
265
+ total_nb_contigs = @annotation_stats[:foreign_contigs].length +
266
+ @annotation_stats[:synteny_contigs].length +
267
+ @annotation_stats[:short_contigs].length
268
+ p_contigs_annotated = @annotation_stats[:synteny_contigs].length.to_f/total_nb_contigs.to_f
269
+ p_cds_annotated = @annotation_stats[:annotated_cds].to_f/@annotation_stats[:total_cds].to_f
270
+
271
+ File.open(file, "w") do |fopen|
272
+ fopen.write("#Contigs annotation based on reference genomes\n")
273
+ fopen.write("Short Contigs (< #{@minlength}) :\t\t" + @annotation_stats[:short_contigs].length.to_s + "\n")
274
+ fopen.write("Foreign Contigs :\t\t" + @annotation_stats[:foreign_contigs].length.to_s + "\n")
275
+ fopen.write("Annotated Contigs :\t\t" + @annotation_stats[:synteny_contigs].length.to_s + "\n")
276
+ fopen.write("Total Contigs :\t\t\t" + total_nb_contigs.to_s + "\n")
277
+ fopen.write("% Contigs annotated :\t\t" + (p_contigs_annotated*100).round(2).to_s + "\n")
278
+ fopen.write("\n")
279
+
280
+ fopen.write("#CDS annotations based on reference genomes\n")
281
+ fopen.write("Annotated CDS :\t\t\t" + @annotation_stats[:annotated_cds].to_s + "\n")
282
+ fopen.write("Total CDS :\t\t\t" + @annotation_stats[:total_cds].to_s + "\n")
283
+ fopen.write("% CDS annotated :\t\t" + (p_cds_annotated*100).round(2).to_s + "\n")
284
+ fopen.write("\n")
285
+
286
+ end
287
+
288
+ end # end of method
289
+
290
+
291
+ # dump cds to file for blast
292
+ def dump_cds
293
+
294
+ cds_outfile = File.open("#{@outdir}/Proteins-foreign.fa","w")
295
+ foreign_cds = []
296
+ @contig_foreign_cds.each_value do |v|
297
+ foreign_cds.push(*v)
298
+ end
299
+ inprot = false
300
+ File.open(@fasta.prodigal_files[:proteins]) do |fprot|
301
+ while l=fprot.gets
302
+ if l[0] == ">"
303
+ inprot = false
304
+ prot_id = l.chomp.split(" ")[0].delete(">")
305
+ if foreign_cds.include? prot_id
306
+ inprot = true
307
+ cds_outfile.write(l)
308
+ end
309
+ elsif inprot
310
+ cds_outfile.write(l)
311
+ end
312
+ end
313
+ end
314
+ cds_outfile.close
315
+ return "#{@outdir}/Proteins-foreign.fa"
316
+
317
+ end # end of method
318
+
319
+
320
+ # extract the information on protein from an externaldb
321
+ def extract_externaldb_prot_info db
322
+
323
+ # NCBI
324
+ # >gi|103485499|ref|YP_615060.1| chromosomal replication initiation protein [Sphingopyxis alaskensis RB2256]
325
+ # Swissprot
326
+ # >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae GN=blaNDM-1 PE=1 SV=1
327
+ # TrEMBL
328
+ # >tr|E5KIY2|E5KIY2_ECOLX Beta-lactamase NDM-1 OS=Escherichia coli GN=blaNDM-1 PE=1 SV=1
329
+
330
+ ref_cds = {}
331
+
332
+ File.open(db, "r") do |dbfile|
333
+ while l=dbfile.gets
334
+
335
+ if l[0] == ">"
336
+
337
+ lA = l.chomp.split("|")
338
+ key_gi = lA[1]
339
+ product_long = lA[-1]
340
+
341
+ organism = ""
342
+ product = ""
343
+
344
+ if product_long.include? " [" and product_long.include? "]" # NCBI
345
+ organism = product_long[/\[.*?\]/]
346
+ product = product_long.split(" [")[0].strip
347
+ elsif product_long.include? "OS="
348
+ product_tmp = product.split("OS=")
349
+ organism = product_tmp[1].split(/[A-Z][A-Z]=/)[0].strip
350
+ product = product_tmp[0].strip
351
+ elsif product_long.include? "[A-Z][A-Z]="
352
+ product = product_long.split(/[A-Z][A-Z]=/)[0].strip
353
+ end
354
+ org = organism.gsub("[","").gsub("]","")
355
+ product.lstrip!
356
+ ref_cds[key_gi] = {product: product, org: org}
357
+
358
+ end
359
+
360
+ end
361
+
362
+ end # end of file reading
363
+
364
+ ref_cds
365
+
366
+ end # end of method
367
+
368
+
369
+ # split fasta file to multiple fasta
370
+ def split_remaining_cds_file file
371
+
372
+ cds_files = []
373
+ outdir = "#{@outdir}/Protein-foreign.split"
374
+
375
+ Dir.mkdir(outdir) if ! Dir.exists? outdir
376
+
377
+ iter = 0
378
+ file_nb = 0
379
+ fout = File.open("#{outdir}/ProtForeign.#{file_nb}.fa", "w")
380
+ cds_files << "#{outdir}/ProtForeign.#{file_nb}.fa"
381
+
382
+ File.open(file, "r") do |fopen|
383
+ while l=fopen.gets
384
+ if l[0] == ">"
385
+ if iter > 19
386
+ fout.close
387
+ iter = 0
388
+ file_nb += 1
389
+ fout = File.open("#{outdir}/ProtForeign.#{file_nb}.fa", "w")
390
+ cds_files << "#{outdir}/ProtForeign.#{file_nb}.fa"
391
+ end
392
+ iter += 1
393
+ end
394
+ fout.write(l)
395
+ end
396
+ end
397
+
398
+ fout.close
399
+
400
+ cds_files
401
+
402
+ end # end of method
403
+
404
+ private :dump_cds, :split_remaining_cds_file
405
+
406
+ end # end of class