bacterial-annotator 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0228aafd97af13b8756df42db362e4a53a30f4f0
4
- data.tar.gz: 858942624597354dd0f52ad98ea1e94373289174
3
+ metadata.gz: 10f3d2469fb3aaf64e6b84076e05ab9e1ae41cd6
4
+ data.tar.gz: f08a5465ce584dd888074c7d0146c1450386598e
5
5
  SHA512:
6
- metadata.gz: 82c36c4fba00b437e721991c739517b7cfeb5edaa7e1ac49849e59d3ffac2165f1ef39f9961aa756ff8ad691fec36a8b3424cf8ce4d0e1125d486fa2e2a38593
7
- data.tar.gz: e8b569f61f2dcb7309c6587ce619f7432e2588a717f3017053adcb693327ac2f21850785883eae4477226d057dde18a308a88d85d2a0558561d567865d1348cc
6
+ metadata.gz: bd006cf021f0a74f1e98fa6367ca4aca0abb36004f375654ec552b68e1ac8ebc5c1f65e38a480473551848d25ac6be904c5d1841cc60657a47384169d368a18c
7
+ data.tar.gz: b5a8cb5c74c028e813bbc585e70b6dcb420b8c8f4ad659e8e4c985bce868009a7f5d6015c4e396768a6146c918943e4586a638c084d844cc91be6ac927c993b6
@@ -63,27 +63,28 @@ def usage_annotate
63
63
  annotate [OPTIONS]
64
64
 
65
65
  // IO
66
- --input/-i <fasta_file> Provide the fasta file to annotate
67
- --outdir/-o <outdir> Output directory [default=BAnnotation]
68
- --force/-f Force to overwrite the output directory
69
- --name/-n <name> Sample name
66
+ --input/-i <fasta_file> Provide the fasta file to annotate
67
+ --outdir/-o <outdir> Output directory [default=BAnnotation]
68
+ --name/-n <name> Sample name
69
+ --force/-f Force to overwrite the output directory
70
70
 
71
71
  // MERGEM-based Annotation (Recommended)
72
- --db/-d <directory> MERGEM database directory
72
+ --db/-d <species_dir> From MERGEM database (include CDS and RNAs fasta)
73
+ // see bacteriapps.genome.ulaval.ca/mergem
73
74
 
74
75
  // Reference-Based Annotation
75
- --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
76
- --externaldb <proteins fasta_file>
77
- Finish or do a complete annotation with this sequence database (a protein fasta file).
78
- Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
79
- >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
80
- >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
81
- --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
82
- --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
83
- .. otherwise hint for a non-functional protein
84
- --minlength <length> Minimum contig length for annotation [default=500]
85
-
86
- --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
76
+ --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
77
+ --externaldb <fasta_file> Finish or do a complete annotation with this sequence database (protein fasta file).
78
+ Fasta headers need to look similar to NCBI or EBI fasta headers
79
+ EX: >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
80
+ >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
81
+
82
+ // Options
83
+ --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
84
+ --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
85
+ // otherwise hint for a non-functional protein
86
+ --minlength <length> Minimum contig length for annotation [default=500]
87
+ --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
87
88
 
88
89
  OEM
89
90
 
@@ -101,6 +102,11 @@ def parseOptions_annotate
101
102
  options[:minlength] = 500
102
103
  options[:meta] = 0
103
104
 
105
+ if ARGV.length == 0
106
+ usage_annotate
107
+ abort
108
+ end
109
+
104
110
  while x = ARGV.shift
105
111
 
106
112
  case x.downcase
@@ -224,12 +230,14 @@ def usage_identify
224
230
 
225
231
  identify [OPTIONS] genome_1.fasta genome_2.fasta genome_x.fasta
226
232
 
227
- //MERGEM Database
228
- --db/-d <database directory>
233
+ //Mash Sketch
234
+ --mash/-m <mash sketch file>
229
235
 
230
236
  //IO
231
237
  --proc <nb of process> Number of process to run the comparison
232
238
 
239
+ --output [csv,tsv|json]
240
+
233
241
  OEM
234
242
 
235
243
  end
@@ -238,21 +246,24 @@ def parseOptions_identify
238
246
 
239
247
  options = {}
240
248
  options[:proc] = 2
241
- options[:genomes_list] = []
249
+ options[:genome_list] = []
250
+ options[:output] = "tsv"
242
251
 
243
252
  while x = ARGV.shift
244
253
 
245
254
  case x.downcase
246
- when "--db", "-d"
247
- options[:database] = ARGV.shift
255
+ when "--mash", "-m"
256
+ options[:mash_file] = ARGV.shift
248
257
  when "--proc", "-p"
249
258
  options[:proc] = ARGV.shift
259
+ when "--output", "-o"
260
+ options[:output] = ARGV.shift
250
261
  when "--help", "-h"
251
262
  usage_identify
252
263
  abort
253
264
  else
254
265
  if File.exists? "#{x}"
255
- options[:genomes_list] << x
266
+ options[:genome_list] << x
256
267
  else
257
268
  puts "#{x} file doesn't exist"
258
269
  usage_identify
@@ -302,14 +313,14 @@ if ARGV.size >= 1
302
313
 
303
314
  # Check Options
304
315
  if ! options.has_key? :refgenome and
305
- ! options.has_key? :external_db
316
+ ! options.has_key? :external_db and
317
+ ! options.has_key? :mergem
306
318
  puts "You didn't provide a reference genome or a database for the annotation !"
307
319
  elsif ! options.has_key? :input
308
320
  puts "You didn't provide a fasta file to annotate !"
309
321
  end
310
322
 
311
323
  bannot = BacterialAnnotator.new(options, ROOT)
312
- bannot.prepare_files_for_annotation
313
324
  bannot.run_annotation
314
325
 
315
326
  elsif ARGV[0] == "compare"
@@ -317,20 +328,19 @@ if ARGV.size >= 1
317
328
  ARGV.shift
318
329
  options = parseOptions_compare
319
330
  bcomp = BacterialComparator.new(options, ROOT)
320
- aln_opt = options[:align].downcase
321
- bcomp.mafft_aln aln_opt
322
- bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
331
+ bcomp.run_comparison
323
332
 
324
333
  elsif ARGV[0] == "identify"
325
334
 
326
335
  ARGV.shift
327
336
  options = parseOptions_identify
328
- if options[:genomes_list].empty?
337
+ if options[:genome_list].empty?
329
338
  puts "You need at least 1 genome fasta to identify !!"
330
339
  usage_identify
331
340
  abort
332
341
  end
333
342
  bident = BacterialIdentificator.new(options, ROOT)
343
+ bident.run_identification
334
344
 
335
345
  elsif ARGV[0] == "--version" or ARGV[0] == "-v"
336
346
 
@@ -5,27 +5,208 @@
5
5
  # version: 0.0.1
6
6
  # licence:
7
7
 
8
+ require 'json'
9
+ require 'zlib'
8
10
 
9
11
 
10
12
  class SequenceAnnotation
11
13
 
12
- attr_accessor :gbk, :coding_seq, :cds_file, :rna_file
14
+ attr_accessor :gbk, :coding_seq, :rna_seq, :cds_file, :rna_file
13
15
 
14
16
  # Initialize then genbank file
15
- def initialize gbk_file, outdir
17
+ def initialize root, outdir, file_ref, type
18
+
19
+ @root = root
20
+ @outdir = outdir
21
+ @coding_seq = {}
22
+ @rna_seq = {}
23
+
24
+ case type
25
+ when "refGbk"
26
+ # reference genome use for annotation
27
+ reference_gbk file_ref
28
+ when "db"
29
+ # reference database use for annotation
30
+ reference_db file_ref
31
+ when "fasta"
32
+ # single fasta database for annotation (completion)
33
+ single_fasta file_ref
34
+ when "newGbk"
35
+ # new genbank holder to be annotated
36
+ new_gbk file_ref
37
+ end
38
+
39
+ end
40
+
41
+
42
+ # Use a MERGEM database to get annotation from it
43
+ def reference_db dir
44
+
45
+ abort "Aborting: Can't find MERGEM db direcotry" if ! File.exists? dir
46
+
47
+ @cds_file = "#{dir}/cds.dmnd"
48
+ @rna_file = "#{dir}/rnas.fasta"
49
+
50
+ json_genes = {}
51
+ Zlib::GzipReader.open("#{dir}/cds.json.gz") {|gz|
52
+ json_genes = JSON.parse(gz.read)
53
+ }
54
+
55
+ json_genes.each do |gene|
16
56
 
17
- @gbk_file = gbk_file
18
- if ! File.exists? @gbk_file
19
- fetch_ncbi_genome(@gbk_file, outdir)
20
- @gbk_file = "#{outdir}/#{gbk_file}.gbk"
21
- # @gbk_file += ".gbk"
57
+ prot_id = gene["cluster_id"]
58
+ @coding_seq[prot_id] = {
59
+ protId: prot_id,
60
+ location: nil,
61
+ product: gene["consensus_name"],
62
+ length: gene["consensus_length"]
63
+ }
64
+
65
+ end
66
+
67
+ # File.open("#{dir}/cds.txt") do |f|
68
+ # while l = f.gets
69
+ # lA = l.chomp.split(" ")
70
+ # @coding_seq[lA[0].gsub(">","")] = {
71
+ # protId: lA[0].gsub(">",""),
72
+ # location: nil,
73
+ # product: lA[1..-1].join(' '),
74
+ # }
75
+ # end
76
+ # end
77
+
78
+ File.open("#{dir}/rnas.txt") do |f|
79
+ while l = f.gets
80
+ lA = l.chomp.split(" ")
81
+ @rna_seq[lA[0].gsub(">","")] = {
82
+ protId: lA[0].gsub(">",""),
83
+ location: nil,
84
+ product: lA[1..-1].join(' '),
85
+ }
86
+ end
22
87
  end
23
88
 
24
- flat_gbk = Bio::FlatFile.auto(@gbk_file)
89
+ end
90
+
91
+ # Use a Genbank Reference and read annotation from it
92
+ def reference_gbk gbk_file
93
+
94
+ puts "# Preparing reference genome files.."
95
+ if ! File.exists? gbk_file
96
+ fetch_ncbi_genome(gbk_file)
97
+ gbk_file = "#{@outdir}/#{gbk_file}.gbk"
98
+ # gbk_file += ".gbk"
99
+ end
100
+
101
+ flat_gbk = Bio::FlatFile.auto(gbk_file)
25
102
 
26
103
  # Check if gbk is valid
27
104
  if flat_gbk.dbclass != Bio::GenBank
28
- abort "Aborting : The input #{@gbk_file} is not a valid genbank file !"
105
+ abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
106
+ else
107
+ @gbk = flat_gbk.next_entry
108
+ end
109
+
110
+ @bioseq = @gbk.to_biosequence
111
+
112
+ write_cds_to_file
113
+ write_rna_to_file
114
+
115
+ end
116
+
117
+ # Use a Genbank Reference and read annotation from it
118
+ def single_fasta fasta_file
119
+
120
+ return "" if ! File.exists? fasta_file
121
+
122
+ File.open(fasta_file, "r") do |dbfile|
123
+
124
+ while l=dbfile.gets
125
+
126
+ if l[0] == ">"
127
+
128
+ lA = l.chomp.split("|")
129
+
130
+ if lA.length > 1 # refseq, ncbi, trembl, swissprot
131
+
132
+ key_gi = l.split(" ")[0][1..-1]
133
+ product_long = lA[-1]
134
+
135
+ organism = ""
136
+ product = ""
137
+ db_source = "[DBSource]"
138
+
139
+ if product_long.scan(/|/).count >= 5 # FROM BIORUBY SCRIPTS
140
+ product = product_long
141
+ db_source = "RefSeq"
142
+ elsif product_long.include? " [" and product_long.include? "]" # NCBI
143
+ organism = product_long[/\[.*?\]/]
144
+ product = product_long.split(" [")[0].strip
145
+ elsif product_long.include? "OS=" # Swissprot / TrEMBL
146
+ product_tmp = product.split("OS=")
147
+ organism = product_tmp[1].split(/[A-Z][A-Z]=/)[0].strip
148
+ product = product_tmp[0].strip
149
+ elsif product_long.include? "[A-Z][A-Z]=" # NCBI
150
+ product = product_long.split(/[A-Z][A-Z]=/)[0].strip
151
+ else
152
+ product = product_long
153
+ end
154
+
155
+ org = organism.gsub("[","").gsub("]","")
156
+
157
+ product.lstrip!
158
+ prot_id = nil
159
+
160
+ if key_gi.count("|") == 4
161
+ if lA[2] == "ref"
162
+ db_source = "RefSeq"
163
+ end
164
+ prot_id = lA[3]
165
+ elsif key_gi.count("|") == 2
166
+ if lA[0].include? == "sp" or
167
+ lA[0].include? == "tr"
168
+ db_source = "UniProtKB"
169
+ end
170
+ prot_id = lA[1]
171
+ elsif key_gi.count("|") == 5
172
+ db_source = "RefSeq"
173
+ prot_id = lA[2]
174
+ end
175
+
176
+
177
+ else # mergem
178
+
179
+
180
+ end
181
+
182
+ @coding_seq[key_gi] = { product: product,
183
+ org: org,
184
+ prot_id: prot_id,
185
+ db_source: db_source }
186
+
187
+ end
188
+
189
+ end
190
+
191
+ end
192
+
193
+ end
194
+
195
+
196
+ # New Genbank Holder to add annotation to it
197
+ def new_gbk gbk_file
198
+
199
+ if ! File.exists? gbk_file
200
+ fetch_ncbi_genome(gbk_file)
201
+ gbk_file = "#{@outdir}/#{gbk_file}.gbk"
202
+ # gbk_file += ".gbk"
203
+ end
204
+
205
+ flat_gbk = Bio::FlatFile.auto(gbk_file)
206
+
207
+ # Check if gbk is valid
208
+ if flat_gbk.dbclass != Bio::GenBank
209
+ abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
29
210
  else
30
211
  @gbk = flat_gbk.next_entry
31
212
  end
@@ -38,9 +219,7 @@ class SequenceAnnotation
38
219
  # Prepare CDS/proteins
39
220
  def get_cds
40
221
 
41
- if @coding_seq == nil
42
-
43
- @coding_seq = {}
222
+ if @coding_seq.empty?
44
223
 
45
224
  # Iterate over each CDS
46
225
  @gbk.each_cds do |ft|
@@ -74,7 +253,7 @@ class SequenceAnnotation
74
253
  product: product[0],
75
254
  bioseq: pepBioSeq,
76
255
  bioseq_gene: dnaBioSeq,
77
- bioseq_len: pepBioSeq.length
256
+ length: pepBioSeq.length
78
257
  }
79
258
 
80
259
  end
@@ -88,12 +267,12 @@ class SequenceAnnotation
88
267
  # Prepare rRNA tRNA
89
268
  def get_rna
90
269
 
91
- if @rna_seq == nil
270
+ if @rna_seq.empty?
92
271
 
93
272
  @rna_seq = {}
94
273
  @gbk.features do |ft|
95
274
 
96
- next if ! ft.feature.to_s.include? "RNA"
275
+ next if ! ft.feature.to_s.include? "rRNA"
97
276
 
98
277
  ftH = ft.to_hash
99
278
  loc = ft.locations
@@ -129,20 +308,19 @@ class SequenceAnnotation
129
308
 
130
309
  end
131
310
 
132
-
133
311
  # Print CDS to files
134
312
  # RETURN : cds_file path
135
- def write_cds_to_file outdir
313
+ def write_cds_to_file
136
314
 
137
315
  cds_file = "#{@gbk.accession}.pep"
138
316
  dna_file = "#{@gbk.accession}.dna"
139
317
 
140
- if @coding_seq == nil
318
+ if @coding_seq.empty?
141
319
  get_cds
142
320
  end
143
321
 
144
- dna_out = File.open("#{outdir}/#{dna_file}", "w")
145
- File.open("#{outdir}/#{cds_file}", "w") do |fwrite|
322
+ dna_out = File.open("#{@outdir}/#{dna_file}", "w")
323
+ File.open("#{@outdir}/#{cds_file}", "w") do |fwrite|
146
324
  @coding_seq.each_key do |k|
147
325
  seqout = @coding_seq[k][:bioseq].output_fasta("#{k}",60)
148
326
  seqout_dna = @coding_seq[k][:bioseq_gene].output_fasta("#{k}",60)
@@ -152,28 +330,28 @@ class SequenceAnnotation
152
330
  end
153
331
  dna_out.close
154
332
 
155
- @cds_file = "#{outdir}/" + cds_file
333
+ @cds_file = "#{@outdir}/" + cds_file
156
334
 
157
335
  end
158
336
 
159
337
  # Print RNA to files
160
338
  # RETURN : rna_file path
161
- def write_rna_to_file outdir
339
+ def write_rna_to_file
162
340
 
163
341
  rna_file = "#{@gbk.accession}.rna"
164
342
 
165
- if @rna_seq == nil
343
+ if @rna_seq.empty?
166
344
  get_rna
167
345
  end
168
346
 
169
- File.open("#{outdir}/#{rna_file}", "w") do |fwrite|
347
+ File.open("#{@outdir}/#{rna_file}", "w") do |fwrite|
170
348
  @rna_seq.each_key do |k|
171
349
  seqout_dna = @rna_seq[k][:bioseq_gene].output_fasta("#{k}|#{@rna_seq[k][:type]}|#{@rna_seq[k][:product]}",60)
172
350
  fwrite.write(seqout_dna)
173
351
  end
174
352
  end
175
353
 
176
- @rna_file = "#{outdir}/" + rna_file
354
+ @rna_file = "#{@outdir}/" + rna_file
177
355
 
178
356
  end
179
357
 
@@ -247,6 +425,7 @@ class SequenceAnnotation
247
425
 
248
426
  # check if there is a reference genome.. reference_locus shouldn't be nil in that case
249
427
  if locus != nil
428
+
250
429
  qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (AA identity: #{pId}%; coverage(q,s): #{cov_query}%,#{cov_subject}%) from #{ref_genome}")
251
430
  ftArray.push(qNote)
252
431
 
@@ -390,9 +569,9 @@ class SequenceAnnotation
390
569
  end
391
570
 
392
571
 
393
- def save_genbank_to_file outdir
572
+ def save_genbank_to_file
394
573
 
395
- File.open("#{outdir}/#{@gbk.definition}.gbk", "w") do |f|
574
+ File.open("#{@outdir}/#{@gbk.definition}.gbk", "w") do |f|
396
575
  f.write(@gbk.to_biosequence.output(:genbank))
397
576
  end
398
577
 
@@ -403,7 +582,7 @@ class SequenceAnnotation
403
582
  ###################
404
583
 
405
584
  # Fct: Get dna sequence
406
- def get_DNA (cds, seq)
585
+ def get_DNA cds, seq
407
586
  loc = cds.locations
408
587
  sbeg = loc[0].from.to_i
409
588
  send = loc[0].to.to_i
@@ -418,11 +597,11 @@ class SequenceAnnotation
418
597
 
419
598
 
420
599
  # Fetch genbank genome from NCBI
421
- def fetch_ncbi_genome refgenome_id, outdir
600
+ def fetch_ncbi_genome refgenome_id
422
601
  Bio::NCBI.default_email = 'default@default.com'
423
602
  ncbi = Bio::NCBI::REST.new
424
603
  genbankstring = ncbi.efetch(refgenome_id, {"db"=>'nucleotide', "rettype"=>'gb'})
425
- File.open("#{outdir}/#{refgenome_id}.gbk", "w") do |f|
604
+ File.open("#{@outdir}/#{refgenome_id}.gbk", "w") do |f|
426
605
  f.write(genbankstring)
427
606
  end
428
607
  end
@@ -13,8 +13,10 @@ class SequenceFasta
13
13
  attr_reader :fasta_flat, :fasta_file, :annotation_files
14
14
 
15
15
  # Initialize fasta holder
16
- def initialize fasta_file, meta
16
+ def initialize root, outdir, fasta_file, meta
17
17
 
18
+ @root = root
19
+ @outdir = outdir
18
20
  @fasta_file = fasta_file
19
21
  @fasta_flat = Bio::FlatFile.auto(@fasta_file)
20
22
 
@@ -32,29 +34,29 @@ class SequenceFasta
32
34
 
33
35
 
34
36
  # Run prodigal on the genome to annotate
35
- def run_prodigal root, outdir
37
+ def run_prodigal
36
38
 
37
39
  @annotation_files = {}
38
- Dir.mkdir "#{outdir}" if ! Dir.exists? "#{outdir}"
40
+ Dir.mkdir "#{@outdir}" if ! Dir.exists? "#{@outdir}"
39
41
  if @meta==1
40
- system("#{root}/prodigal.linux -p meta -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
42
+ system("#{@root}/prodigal.linux -p meta -i #{@fasta_file} -a #{@outdir}/Proteins.fa -d #{@outdir}/Genes.fa -o #{@outdir}/Genbanks.gbk -q")
41
43
  else
42
- system("#{root}/prodigal.linux -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
44
+ system("#{@root}/prodigal.linux -i #{@fasta_file} -a #{@outdir}/Proteins.fa -d #{@outdir}/Genes.fa -o #{@outdir}/Genbanks.gbk -q")
43
45
  end
44
46
 
45
47
  @annotation_files = {
46
- multiGBK: "#{outdir}/Genbanks.gbk",
48
+ multiGBK: "#{@outdir}/Genbanks.gbk",
47
49
  contigs: [],
48
50
  contigs_length: [],
49
- genes: "#{outdir}/Genes.fa",
50
- proteins: "#{outdir}/Proteins.fa",
51
+ genes: "#{@outdir}/Genes.fa",
52
+ proteins: "#{@outdir}/Proteins.fa",
51
53
  prot_ids_by_contig: {},
52
- fasta_path: "#{outdir}/single-fasta/",
53
- gbk_path: "#{outdir}/single-genbank/"
54
+ fasta_path: "#{@outdir}/single-fasta/",
55
+ gbk_path: "#{@outdir}/single-genbank/"
54
56
  }
55
57
 
56
- split_fasta outdir
57
- split_genbank outdir, "#{outdir}/Genbanks.gbk"
58
+ split_fasta
59
+ split_genbank
58
60
  extract_cds_names
59
61
  @annotation_files
60
62
 
@@ -63,14 +65,14 @@ class SequenceFasta
63
65
 
64
66
  # Split Multi Fasta file
65
67
  # RETURN : array of fasta files
66
- def split_fasta outdir
68
+ def split_fasta
67
69
  @single_fasta = {}
68
- Dir.mkdir("#{outdir}/single-fasta") if ! Dir.exists?("#{outdir}/single-fasta")
70
+ Dir.mkdir("#{@outdir}/single-fasta") if ! Dir.exists?("#{@outdir}/single-fasta")
69
71
  @fasta_flat.each_entry do |seq|
70
72
  file_name = seq.definition.chomp.split(" ")[0]
71
73
  @annotation_files[:contigs] << "#{file_name}"
72
74
  @annotation_files[:contigs_length] << seq.seq.length
73
- File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
75
+ File.open("#{@outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
74
76
  fwrite.write(seq)
75
77
  end
76
78
  @single_fasta[file_name] = seq
@@ -80,9 +82,10 @@ class SequenceFasta
80
82
 
81
83
  # Split Multi Genbanks file
82
84
  # RETURN : array of genbank files
83
- def split_genbank outdir, multigbk
85
+ def split_genbank
84
86
 
85
- Dir.mkdir("#{outdir}/single-genbank")if ! Dir.exists?("#{outdir}/single-genbank")
87
+ multigbk = "#{@outdir}/Genbanks.gbk"
88
+ Dir.mkdir("#{@outdir}/single-genbank")if ! Dir.exists?("#{@outdir}/single-genbank")
86
89
  File.open(multigbk,"r") do |f|
87
90
  fopen = nil
88
91
  while l = f.gets
@@ -96,7 +99,7 @@ class SequenceFasta
96
99
  year = date.year
97
100
  locus = "LOCUS #{file_name}#{spacer}#{seq_length.to_s} bp DNA linear BCT #{day}-#{month}-#{year}\n"
98
101
  locus += "DEFINITION #{file_name}\n"
99
- fopen = File.open("#{outdir}/single-genbank/#{file_name}.gbk", "w")
102
+ fopen = File.open("#{@outdir}/single-genbank/#{file_name}.gbk", "w")
100
103
  fopen.write(locus)
101
104
  elsif l[0..1] == "//"
102
105
  fopen.write(outseq)