bacterial-annotator 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0228aafd97af13b8756df42db362e4a53a30f4f0
4
- data.tar.gz: 858942624597354dd0f52ad98ea1e94373289174
3
+ metadata.gz: 10f3d2469fb3aaf64e6b84076e05ab9e1ae41cd6
4
+ data.tar.gz: f08a5465ce584dd888074c7d0146c1450386598e
5
5
  SHA512:
6
- metadata.gz: 82c36c4fba00b437e721991c739517b7cfeb5edaa7e1ac49849e59d3ffac2165f1ef39f9961aa756ff8ad691fec36a8b3424cf8ce4d0e1125d486fa2e2a38593
7
- data.tar.gz: e8b569f61f2dcb7309c6587ce619f7432e2588a717f3017053adcb693327ac2f21850785883eae4477226d057dde18a308a88d85d2a0558561d567865d1348cc
6
+ metadata.gz: bd006cf021f0a74f1e98fa6367ca4aca0abb36004f375654ec552b68e1ac8ebc5c1f65e38a480473551848d25ac6be904c5d1841cc60657a47384169d368a18c
7
+ data.tar.gz: b5a8cb5c74c028e813bbc585e70b6dcb420b8c8f4ad659e8e4c985bce868009a7f5d6015c4e396768a6146c918943e4586a638c084d844cc91be6ac927c993b6
@@ -63,27 +63,28 @@ def usage_annotate
63
63
  annotate [OPTIONS]
64
64
 
65
65
  // IO
66
- --input/-i <fasta_file> Provide the fasta file to annotate
67
- --outdir/-o <outdir> Output directory [default=BAnnotation]
68
- --force/-f Force to overwrite the output directory
69
- --name/-n <name> Sample name
66
+ --input/-i <fasta_file> Provide the fasta file to annotate
67
+ --outdir/-o <outdir> Output directory [default=BAnnotation]
68
+ --name/-n <name> Sample name
69
+ --force/-f Force to overwrite the output directory
70
70
 
71
71
  // MERGEM-based Annotation (Recommended)
72
- --db/-d <directory> MERGEM database directory
72
+ --db/-d <species_dir> From MERGEM database (include CDS and RNAs fasta)
73
+ // see bacteriapps.genome.ulaval.ca/mergem
73
74
 
74
75
  // Reference-Based Annotation
75
- --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
76
- --externaldb <proteins fasta_file>
77
- Finish or do a complete annotation with this sequence database (a protein fasta file).
78
- Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
79
- >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
80
- >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
81
- --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
82
- --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
83
- .. otherwise hint for a non-functional protein
84
- --minlength <length> Minimum contig length for annotation [default=500]
85
-
86
- --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
76
+ --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
77
+ --externaldb <fasta_file> Finish or do a complete annotation with this sequence database (protein fasta file).
78
+ Fasta headers need to look similar to NCBI or EBI fasta headers
79
+ EX: >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
80
+ >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
81
+
82
+ // Options
83
+ --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
84
+ --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
85
+ // otherwise hint for a non-functional protein
86
+ --minlength <length> Minimum contig length for annotation [default=500]
87
+ --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
87
88
 
88
89
  OEM
89
90
 
@@ -101,6 +102,11 @@ def parseOptions_annotate
101
102
  options[:minlength] = 500
102
103
  options[:meta] = 0
103
104
 
105
+ if ARGV.length == 0
106
+ usage_annotate
107
+ abort
108
+ end
109
+
104
110
  while x = ARGV.shift
105
111
 
106
112
  case x.downcase
@@ -224,12 +230,14 @@ def usage_identify
224
230
 
225
231
  identify [OPTIONS] genome_1.fasta genome_2.fasta genome_x.fasta
226
232
 
227
- //MERGEM Database
228
- --db/-d <database directory>
233
+ //Mash Sketch
234
+ --mash/-m <mash sketch file>
229
235
 
230
236
  //IO
231
237
  --proc <nb of process> Number of process to run the comparison
232
238
 
239
+ --output [csv,tsv|json]
240
+
233
241
  OEM
234
242
 
235
243
  end
@@ -238,21 +246,24 @@ def parseOptions_identify
238
246
 
239
247
  options = {}
240
248
  options[:proc] = 2
241
- options[:genomes_list] = []
249
+ options[:genome_list] = []
250
+ options[:output] = "tsv"
242
251
 
243
252
  while x = ARGV.shift
244
253
 
245
254
  case x.downcase
246
- when "--db", "-d"
247
- options[:database] = ARGV.shift
255
+ when "--mash", "-m"
256
+ options[:mash_file] = ARGV.shift
248
257
  when "--proc", "-p"
249
258
  options[:proc] = ARGV.shift
259
+ when "--output", "-o"
260
+ options[:output] = ARGV.shift
250
261
  when "--help", "-h"
251
262
  usage_identify
252
263
  abort
253
264
  else
254
265
  if File.exists? "#{x}"
255
- options[:genomes_list] << x
266
+ options[:genome_list] << x
256
267
  else
257
268
  puts "#{x} file doesn't exist"
258
269
  usage_identify
@@ -302,14 +313,14 @@ if ARGV.size >= 1
302
313
 
303
314
  # Check Options
304
315
  if ! options.has_key? :refgenome and
305
- ! options.has_key? :external_db
316
+ ! options.has_key? :external_db and
317
+ ! options.has_key? :mergem
306
318
  puts "You didn't provide a reference genome or a database for the annotation !"
307
319
  elsif ! options.has_key? :input
308
320
  puts "You didn't provide a fasta file to annotate !"
309
321
  end
310
322
 
311
323
  bannot = BacterialAnnotator.new(options, ROOT)
312
- bannot.prepare_files_for_annotation
313
324
  bannot.run_annotation
314
325
 
315
326
  elsif ARGV[0] == "compare"
@@ -317,20 +328,19 @@ if ARGV.size >= 1
317
328
  ARGV.shift
318
329
  options = parseOptions_compare
319
330
  bcomp = BacterialComparator.new(options, ROOT)
320
- aln_opt = options[:align].downcase
321
- bcomp.mafft_aln aln_opt
322
- bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
331
+ bcomp.run_comparison
323
332
 
324
333
  elsif ARGV[0] == "identify"
325
334
 
326
335
  ARGV.shift
327
336
  options = parseOptions_identify
328
- if options[:genomes_list].empty?
337
+ if options[:genome_list].empty?
329
338
  puts "You need at least 1 genome fasta to identify !!"
330
339
  usage_identify
331
340
  abort
332
341
  end
333
342
  bident = BacterialIdentificator.new(options, ROOT)
343
+ bident.run_identification
334
344
 
335
345
  elsif ARGV[0] == "--version" or ARGV[0] == "-v"
336
346
 
@@ -5,27 +5,208 @@
5
5
  # version: 0.0.1
6
6
  # licence:
7
7
 
8
+ require 'json'
9
+ require 'zlib'
8
10
 
9
11
 
10
12
  class SequenceAnnotation
11
13
 
12
- attr_accessor :gbk, :coding_seq, :cds_file, :rna_file
14
+ attr_accessor :gbk, :coding_seq, :rna_seq, :cds_file, :rna_file
13
15
 
14
16
  # Initialize then genbank file
15
- def initialize gbk_file, outdir
17
+ def initialize root, outdir, file_ref, type
18
+
19
+ @root = root
20
+ @outdir = outdir
21
+ @coding_seq = {}
22
+ @rna_seq = {}
23
+
24
+ case type
25
+ when "refGbk"
26
+ # reference genome use for annotation
27
+ reference_gbk file_ref
28
+ when "db"
29
+ # reference database use for annotation
30
+ reference_db file_ref
31
+ when "fasta"
32
+ # single fasta database for annotation (completion)
33
+ single_fasta file_ref
34
+ when "newGbk"
35
+ # new genbank holder to be annotated
36
+ new_gbk file_ref
37
+ end
38
+
39
+ end
40
+
41
+
42
+ # Use a MERGEM database to get annotation from it
43
+ def reference_db dir
44
+
45
+ abort "Aborting: Can't find MERGEM db direcotry" if ! File.exists? dir
46
+
47
+ @cds_file = "#{dir}/cds.dmnd"
48
+ @rna_file = "#{dir}/rnas.fasta"
49
+
50
+ json_genes = {}
51
+ Zlib::GzipReader.open("#{dir}/cds.json.gz") {|gz|
52
+ json_genes = JSON.parse(gz.read)
53
+ }
54
+
55
+ json_genes.each do |gene|
16
56
 
17
- @gbk_file = gbk_file
18
- if ! File.exists? @gbk_file
19
- fetch_ncbi_genome(@gbk_file, outdir)
20
- @gbk_file = "#{outdir}/#{gbk_file}.gbk"
21
- # @gbk_file += ".gbk"
57
+ prot_id = gene["cluster_id"]
58
+ @coding_seq[prot_id] = {
59
+ protId: prot_id,
60
+ location: nil,
61
+ product: gene["consensus_name"],
62
+ length: gene["consensus_length"]
63
+ }
64
+
65
+ end
66
+
67
+ # File.open("#{dir}/cds.txt") do |f|
68
+ # while l = f.gets
69
+ # lA = l.chomp.split(" ")
70
+ # @coding_seq[lA[0].gsub(">","")] = {
71
+ # protId: lA[0].gsub(">",""),
72
+ # location: nil,
73
+ # product: lA[1..-1].join(' '),
74
+ # }
75
+ # end
76
+ # end
77
+
78
+ File.open("#{dir}/rnas.txt") do |f|
79
+ while l = f.gets
80
+ lA = l.chomp.split(" ")
81
+ @rna_seq[lA[0].gsub(">","")] = {
82
+ protId: lA[0].gsub(">",""),
83
+ location: nil,
84
+ product: lA[1..-1].join(' '),
85
+ }
86
+ end
22
87
  end
23
88
 
24
- flat_gbk = Bio::FlatFile.auto(@gbk_file)
89
+ end
90
+
91
+ # Use a Genbank Reference and read annotation from it
92
+ def reference_gbk gbk_file
93
+
94
+ puts "# Preparing reference genome files.."
95
+ if ! File.exists? gbk_file
96
+ fetch_ncbi_genome(gbk_file)
97
+ gbk_file = "#{@outdir}/#{gbk_file}.gbk"
98
+ # gbk_file += ".gbk"
99
+ end
100
+
101
+ flat_gbk = Bio::FlatFile.auto(gbk_file)
25
102
 
26
103
  # Check if gbk is valid
27
104
  if flat_gbk.dbclass != Bio::GenBank
28
- abort "Aborting : The input #{@gbk_file} is not a valid genbank file !"
105
+ abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
106
+ else
107
+ @gbk = flat_gbk.next_entry
108
+ end
109
+
110
+ @bioseq = @gbk.to_biosequence
111
+
112
+ write_cds_to_file
113
+ write_rna_to_file
114
+
115
+ end
116
+
117
+ # Use a Genbank Reference and read annotation from it
118
+ def single_fasta fasta_file
119
+
120
+ return "" if ! File.exists? fasta_file
121
+
122
+ File.open(fasta_file, "r") do |dbfile|
123
+
124
+ while l=dbfile.gets
125
+
126
+ if l[0] == ">"
127
+
128
+ lA = l.chomp.split("|")
129
+
130
+ if lA.length > 1 # refseq, ncbi, trembl, swissprot
131
+
132
+ key_gi = l.split(" ")[0][1..-1]
133
+ product_long = lA[-1]
134
+
135
+ organism = ""
136
+ product = ""
137
+ db_source = "[DBSource]"
138
+
139
+ if product_long.scan(/|/).count >= 5 # FROM BIORUBY SCRIPTS
140
+ product = product_long
141
+ db_source = "RefSeq"
142
+ elsif product_long.include? " [" and product_long.include? "]" # NCBI
143
+ organism = product_long[/\[.*?\]/]
144
+ product = product_long.split(" [")[0].strip
145
+ elsif product_long.include? "OS=" # Swissprot / TrEMBL
146
+ product_tmp = product.split("OS=")
147
+ organism = product_tmp[1].split(/[A-Z][A-Z]=/)[0].strip
148
+ product = product_tmp[0].strip
149
+ elsif product_long.include? "[A-Z][A-Z]=" # NCBI
150
+ product = product_long.split(/[A-Z][A-Z]=/)[0].strip
151
+ else
152
+ product = product_long
153
+ end
154
+
155
+ org = organism.gsub("[","").gsub("]","")
156
+
157
+ product.lstrip!
158
+ prot_id = nil
159
+
160
+ if key_gi.count("|") == 4
161
+ if lA[2] == "ref"
162
+ db_source = "RefSeq"
163
+ end
164
+ prot_id = lA[3]
165
+ elsif key_gi.count("|") == 2
166
+ if lA[0].include? == "sp" or
167
+ lA[0].include? == "tr"
168
+ db_source = "UniProtKB"
169
+ end
170
+ prot_id = lA[1]
171
+ elsif key_gi.count("|") == 5
172
+ db_source = "RefSeq"
173
+ prot_id = lA[2]
174
+ end
175
+
176
+
177
+ else # mergem
178
+
179
+
180
+ end
181
+
182
+ @coding_seq[key_gi] = { product: product,
183
+ org: org,
184
+ prot_id: prot_id,
185
+ db_source: db_source }
186
+
187
+ end
188
+
189
+ end
190
+
191
+ end
192
+
193
+ end
194
+
195
+
196
+ # New Genbank Holder to add annotation to it
197
+ def new_gbk gbk_file
198
+
199
+ if ! File.exists? gbk_file
200
+ fetch_ncbi_genome(gbk_file)
201
+ gbk_file = "#{@outdir}/#{gbk_file}.gbk"
202
+ # gbk_file += ".gbk"
203
+ end
204
+
205
+ flat_gbk = Bio::FlatFile.auto(gbk_file)
206
+
207
+ # Check if gbk is valid
208
+ if flat_gbk.dbclass != Bio::GenBank
209
+ abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
29
210
  else
30
211
  @gbk = flat_gbk.next_entry
31
212
  end
@@ -38,9 +219,7 @@ class SequenceAnnotation
38
219
  # Prepare CDS/proteins
39
220
  def get_cds
40
221
 
41
- if @coding_seq == nil
42
-
43
- @coding_seq = {}
222
+ if @coding_seq.empty?
44
223
 
45
224
  # Iterate over each CDS
46
225
  @gbk.each_cds do |ft|
@@ -74,7 +253,7 @@ class SequenceAnnotation
74
253
  product: product[0],
75
254
  bioseq: pepBioSeq,
76
255
  bioseq_gene: dnaBioSeq,
77
- bioseq_len: pepBioSeq.length
256
+ length: pepBioSeq.length
78
257
  }
79
258
 
80
259
  end
@@ -88,12 +267,12 @@ class SequenceAnnotation
88
267
  # Prepare rRNA tRNA
89
268
  def get_rna
90
269
 
91
- if @rna_seq == nil
270
+ if @rna_seq.empty?
92
271
 
93
272
  @rna_seq = {}
94
273
  @gbk.features do |ft|
95
274
 
96
- next if ! ft.feature.to_s.include? "RNA"
275
+ next if ! ft.feature.to_s.include? "rRNA"
97
276
 
98
277
  ftH = ft.to_hash
99
278
  loc = ft.locations
@@ -129,20 +308,19 @@ class SequenceAnnotation
129
308
 
130
309
  end
131
310
 
132
-
133
311
  # Print CDS to files
134
312
  # RETURN : cds_file path
135
- def write_cds_to_file outdir
313
+ def write_cds_to_file
136
314
 
137
315
  cds_file = "#{@gbk.accession}.pep"
138
316
  dna_file = "#{@gbk.accession}.dna"
139
317
 
140
- if @coding_seq == nil
318
+ if @coding_seq.empty?
141
319
  get_cds
142
320
  end
143
321
 
144
- dna_out = File.open("#{outdir}/#{dna_file}", "w")
145
- File.open("#{outdir}/#{cds_file}", "w") do |fwrite|
322
+ dna_out = File.open("#{@outdir}/#{dna_file}", "w")
323
+ File.open("#{@outdir}/#{cds_file}", "w") do |fwrite|
146
324
  @coding_seq.each_key do |k|
147
325
  seqout = @coding_seq[k][:bioseq].output_fasta("#{k}",60)
148
326
  seqout_dna = @coding_seq[k][:bioseq_gene].output_fasta("#{k}",60)
@@ -152,28 +330,28 @@ class SequenceAnnotation
152
330
  end
153
331
  dna_out.close
154
332
 
155
- @cds_file = "#{outdir}/" + cds_file
333
+ @cds_file = "#{@outdir}/" + cds_file
156
334
 
157
335
  end
158
336
 
159
337
  # Print RNA to files
160
338
  # RETURN : rna_file path
161
- def write_rna_to_file outdir
339
+ def write_rna_to_file
162
340
 
163
341
  rna_file = "#{@gbk.accession}.rna"
164
342
 
165
- if @rna_seq == nil
343
+ if @rna_seq.empty?
166
344
  get_rna
167
345
  end
168
346
 
169
- File.open("#{outdir}/#{rna_file}", "w") do |fwrite|
347
+ File.open("#{@outdir}/#{rna_file}", "w") do |fwrite|
170
348
  @rna_seq.each_key do |k|
171
349
  seqout_dna = @rna_seq[k][:bioseq_gene].output_fasta("#{k}|#{@rna_seq[k][:type]}|#{@rna_seq[k][:product]}",60)
172
350
  fwrite.write(seqout_dna)
173
351
  end
174
352
  end
175
353
 
176
- @rna_file = "#{outdir}/" + rna_file
354
+ @rna_file = "#{@outdir}/" + rna_file
177
355
 
178
356
  end
179
357
 
@@ -247,6 +425,7 @@ class SequenceAnnotation
247
425
 
248
426
  # check if there is a reference genome.. reference_locus shouldn't be nil in that case
249
427
  if locus != nil
428
+
250
429
  qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (AA identity: #{pId}%; coverage(q,s): #{cov_query}%,#{cov_subject}%) from #{ref_genome}")
251
430
  ftArray.push(qNote)
252
431
 
@@ -390,9 +569,9 @@ class SequenceAnnotation
390
569
  end
391
570
 
392
571
 
393
- def save_genbank_to_file outdir
572
+ def save_genbank_to_file
394
573
 
395
- File.open("#{outdir}/#{@gbk.definition}.gbk", "w") do |f|
574
+ File.open("#{@outdir}/#{@gbk.definition}.gbk", "w") do |f|
396
575
  f.write(@gbk.to_biosequence.output(:genbank))
397
576
  end
398
577
 
@@ -403,7 +582,7 @@ class SequenceAnnotation
403
582
  ###################
404
583
 
405
584
  # Fct: Get dna sequence
406
- def get_DNA (cds, seq)
585
+ def get_DNA cds, seq
407
586
  loc = cds.locations
408
587
  sbeg = loc[0].from.to_i
409
588
  send = loc[0].to.to_i
@@ -418,11 +597,11 @@ class SequenceAnnotation
418
597
 
419
598
 
420
599
  # Fetch genbank genome from NCBI
421
- def fetch_ncbi_genome refgenome_id, outdir
600
+ def fetch_ncbi_genome refgenome_id
422
601
  Bio::NCBI.default_email = 'default@default.com'
423
602
  ncbi = Bio::NCBI::REST.new
424
603
  genbankstring = ncbi.efetch(refgenome_id, {"db"=>'nucleotide', "rettype"=>'gb'})
425
- File.open("#{outdir}/#{refgenome_id}.gbk", "w") do |f|
604
+ File.open("#{@outdir}/#{refgenome_id}.gbk", "w") do |f|
426
605
  f.write(genbankstring)
427
606
  end
428
607
  end
@@ -13,8 +13,10 @@ class SequenceFasta
13
13
  attr_reader :fasta_flat, :fasta_file, :annotation_files
14
14
 
15
15
  # Initialize fasta holder
16
- def initialize fasta_file, meta
16
+ def initialize root, outdir, fasta_file, meta
17
17
 
18
+ @root = root
19
+ @outdir = outdir
18
20
  @fasta_file = fasta_file
19
21
  @fasta_flat = Bio::FlatFile.auto(@fasta_file)
20
22
 
@@ -32,29 +34,29 @@ class SequenceFasta
32
34
 
33
35
 
34
36
  # Run prodigal on the genome to annotate
35
- def run_prodigal root, outdir
37
+ def run_prodigal
36
38
 
37
39
  @annotation_files = {}
38
- Dir.mkdir "#{outdir}" if ! Dir.exists? "#{outdir}"
40
+ Dir.mkdir "#{@outdir}" if ! Dir.exists? "#{@outdir}"
39
41
  if @meta==1
40
- system("#{root}/prodigal.linux -p meta -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
42
+ system("#{@root}/prodigal.linux -p meta -i #{@fasta_file} -a #{@outdir}/Proteins.fa -d #{@outdir}/Genes.fa -o #{@outdir}/Genbanks.gbk -q")
41
43
  else
42
- system("#{root}/prodigal.linux -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
44
+ system("#{@root}/prodigal.linux -i #{@fasta_file} -a #{@outdir}/Proteins.fa -d #{@outdir}/Genes.fa -o #{@outdir}/Genbanks.gbk -q")
43
45
  end
44
46
 
45
47
  @annotation_files = {
46
- multiGBK: "#{outdir}/Genbanks.gbk",
48
+ multiGBK: "#{@outdir}/Genbanks.gbk",
47
49
  contigs: [],
48
50
  contigs_length: [],
49
- genes: "#{outdir}/Genes.fa",
50
- proteins: "#{outdir}/Proteins.fa",
51
+ genes: "#{@outdir}/Genes.fa",
52
+ proteins: "#{@outdir}/Proteins.fa",
51
53
  prot_ids_by_contig: {},
52
- fasta_path: "#{outdir}/single-fasta/",
53
- gbk_path: "#{outdir}/single-genbank/"
54
+ fasta_path: "#{@outdir}/single-fasta/",
55
+ gbk_path: "#{@outdir}/single-genbank/"
54
56
  }
55
57
 
56
- split_fasta outdir
57
- split_genbank outdir, "#{outdir}/Genbanks.gbk"
58
+ split_fasta
59
+ split_genbank
58
60
  extract_cds_names
59
61
  @annotation_files
60
62
 
@@ -63,14 +65,14 @@ class SequenceFasta
63
65
 
64
66
  # Split Multi Fasta file
65
67
  # RETURN : array of fasta files
66
- def split_fasta outdir
68
+ def split_fasta
67
69
  @single_fasta = {}
68
- Dir.mkdir("#{outdir}/single-fasta") if ! Dir.exists?("#{outdir}/single-fasta")
70
+ Dir.mkdir("#{@outdir}/single-fasta") if ! Dir.exists?("#{@outdir}/single-fasta")
69
71
  @fasta_flat.each_entry do |seq|
70
72
  file_name = seq.definition.chomp.split(" ")[0]
71
73
  @annotation_files[:contigs] << "#{file_name}"
72
74
  @annotation_files[:contigs_length] << seq.seq.length
73
- File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
75
+ File.open("#{@outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
74
76
  fwrite.write(seq)
75
77
  end
76
78
  @single_fasta[file_name] = seq
@@ -80,9 +82,10 @@ class SequenceFasta
80
82
 
81
83
  # Split Multi Genbanks file
82
84
  # RETURN : array of genbank files
83
- def split_genbank outdir, multigbk
85
+ def split_genbank
84
86
 
85
- Dir.mkdir("#{outdir}/single-genbank")if ! Dir.exists?("#{outdir}/single-genbank")
87
+ multigbk = "#{@outdir}/Genbanks.gbk"
88
+ Dir.mkdir("#{@outdir}/single-genbank")if ! Dir.exists?("#{@outdir}/single-genbank")
86
89
  File.open(multigbk,"r") do |f|
87
90
  fopen = nil
88
91
  while l = f.gets
@@ -96,7 +99,7 @@ class SequenceFasta
96
99
  year = date.year
97
100
  locus = "LOCUS #{file_name}#{spacer}#{seq_length.to_s} bp DNA linear BCT #{day}-#{month}-#{year}\n"
98
101
  locus += "DEFINITION #{file_name}\n"
99
- fopen = File.open("#{outdir}/single-genbank/#{file_name}.gbk", "w")
102
+ fopen = File.open("#{@outdir}/single-genbank/#{file_name}.gbk", "w")
100
103
  fopen.write(locus)
101
104
  elsif l[0..1] == "//"
102
105
  fopen.write(outseq)