full_lengther_next 0.6.2 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/{README.rdoc → README.md} +0 -0
  9. data/Rakefile +6 -37
  10. data/bin/console +14 -0
  11. data/bin/download_fln_dbs.rb +2 -7
  12. data/bin/full_lengther_next +85 -6
  13. data/bin/make_user_db.rb +13 -5
  14. data/bin/setup +8 -0
  15. data/full_lengther_next.gemspec +42 -0
  16. data/lib/full_lengther_next.rb +2 -10
  17. data/lib/full_lengther_next/artifacts.rb +74 -0
  18. data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
  19. data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
  20. data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
  21. data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
  22. data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
  23. data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
  24. data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
  25. data/lib/full_lengther_next/fln_stats.rb +613 -0
  26. data/lib/full_lengther_next/go_methods.rb +42 -0
  27. data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
  28. data/lib/full_lengther_next/mapping.rb +296 -0
  29. data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
  30. data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
  31. data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
  32. data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
  33. data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
  34. data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
  35. data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
  36. data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
  37. data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
  38. data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
  39. data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
  40. data/lib/full_lengther_next/version.rb +3 -0
  41. data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
  42. data/report_templates/general_summary.erb +140 -0
  43. data/report_templates/mapping_summary.erb +98 -0
  44. data/report_templates/reptrans_summary.erb +32 -0
  45. metadata +112 -134
  46. data/.gemtest +0 -0
  47. data/History.txt +0 -32
  48. data/Manifest.txt +0 -44
  49. data/PostInstall.txt +0 -6
  50. data/bin/plot_fln.rb +0 -270
  51. data/bin/plot_taxonomy.rb +0 -70
  52. data/lib/expresscanvas.zip +0 -0
  53. data/lib/full_lengther_next/classes/artifacts.rb +0 -66
  54. data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
  55. data/script/console +0 -10
  56. data/script/destroy +0 -14
  57. data/script/generate +0 -14
  58. data/test/test_full_lengther_next.rb +0 -11
  59. data/test/test_helper.rb +0 -3
@@ -0,0 +1,42 @@
1
+ def go_for_graph(sequences_by_ontologies, fpkm = {})
2
+ container = {}
3
+ go_data = [
4
+ [:function_go, 'F:'],
5
+ [:component_go, 'C:'],
6
+ [:process_go, 'P:']
7
+ ]
8
+
9
+ go_data.each do |key, prefix|
10
+ go_ontology = sequences_by_ontologies.select{|go, seq_ids| go =~ /^#{prefix}/}
11
+ go_names = []
12
+ go_vals = []
13
+ go_ontology.each do |go_name, seq_names|
14
+ go_label = go_name.gsub(prefix, '')
15
+ if fpkm.empty?
16
+ go_vals << seq_names.length
17
+ go_names << go_label
18
+ else
19
+ sum = seq_names.map{|seq_name| fpkm[seq_name].first }.inject { |sum, n| sum + n }
20
+ if sum > 0
21
+ go_vals << sum
22
+ go_names << go_label
23
+ end
24
+ end
25
+ end
26
+ go_table = []
27
+ go_names.each_with_index do |name, index|
28
+ go_table << [name, go_vals[index]]
29
+ end
30
+ go_table.sort!{|v1, v2| v2[1] <=> v1[1]}
31
+ go_table.unshift([key.to_s, 'GO'])
32
+ if !go_names.empty?
33
+ container[key] = go_table
34
+ else
35
+ container[key] = [
36
+ [key.to_s, 'GO'],
37
+ ['No_data', 1]
38
+ ]
39
+ end
40
+ end
41
+ return container
42
+ end
@@ -0,0 +1,296 @@
1
+ require 'scbi_zcat'
2
+
3
+ class Mpileup
4
+ def initialize(file_path)
5
+ @mpileup_file = ScbiZcatFile.new(file_path)
6
+ @last_line = nil
7
+ end
8
+
9
+ def read_contig(contig_name, contig_length)
10
+ coverages = []
11
+ if !@last_line.nil?
12
+ if @last_line[0] != contig_name
13
+ return nil
14
+ else
15
+ coverages = initialize_contig(contig_length)
16
+ end
17
+ else
18
+ line = @mpileup_file.readline
19
+ if line.nil?
20
+ @last_line = nil
21
+ return nil
22
+ else
23
+ @last_line = line.chomp.split("\t")
24
+ if @last_line[0] != contig_name
25
+ return nil
26
+ else
27
+ coverages = initialize_contig(contig_length)
28
+ end
29
+ end
30
+ end
31
+
32
+ while !@mpileup_file.eof
33
+ fields = @mpileup_file.readline.chomp.split("\t")
34
+ contig = fields[0]
35
+ if contig == contig_name
36
+ coverages[fields[1].to_i-1] = fields[2].to_i
37
+ else
38
+ @last_line = fields
39
+ break
40
+ end
41
+ end
42
+ return coverages
43
+ end
44
+
45
+ def initialize_contig(contig_length)
46
+ coverages = Array.new(contig_length, 0)
47
+ coverages[@last_line[1].to_i-1] = @last_line[2].to_i
48
+ return coverages
49
+ end
50
+
51
+ def close
52
+ @mpileup_file.close
53
+ end
54
+ end
55
+
56
+ class Mapping
57
+ def initialize(user_options = {})
58
+
59
+ options = {
60
+ threads: 1,
61
+ total_reads: [],
62
+ }.merge!(user_options)
63
+ @ref_fasta_path = options[:ref_fasta_path]
64
+ @temp_folder = options[:temp_folder]
65
+ @threads = options[:threads]
66
+
67
+ @map_files = []
68
+ @paired = []
69
+ @idxstats = []
70
+ @mpileups = []
71
+ @coverage_results = {}
72
+ @total_reads = options[:total_reads]
73
+ end
74
+
75
+ def do_ref(user_options = {})
76
+ options = {
77
+ name: 'ref',
78
+ command: 'bowtie2-build -f --threads /THREADS/ /REF_FASTA/ /OUTPUT/',
79
+ log: 'reference_log',
80
+ force: false
81
+ }
82
+ options.merge!(user_options)
83
+ @ref = File.join(@temp_folder, options[:name])
84
+ cmd = options[:command].gsub('/THREADS/', @threads.to_s)
85
+ cmd.gsub!('/REF_FASTA/', @ref_fasta_path)
86
+ cmd.gsub!('/OUTPUT/', @ref)
87
+ cmd = cmd + " &> #{File.join(@temp_folder, options[:log])}"
88
+ system(cmd) if options[:force] || Dir.glob(@ref+'*.bt2').length == 0
89
+ end
90
+
91
+ def do_samtools_ref
92
+ cmd = "samtools faidx #{@ref_fasta_path}"
93
+ system(cmd) if !File.exists?(@ref_fasta_path + '.fai')
94
+ end
95
+
96
+ def do_map(user_options = {})
97
+ options = {
98
+ files: [],
99
+ command: 'bowtie2 -p /THREADS/ -x /REFERENCE/',
100
+ paired_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
101
+ single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
102
+ flag_single: '-U',
103
+ flags_paired: ['-1', '-2'],
104
+ additional_paired_flags: '',
105
+ flag_output: '-S',
106
+ output: File.join(@temp_folder, 'map_data'),
107
+ log: File.join(@temp_folder, 'mapping_log'),
108
+ force: false
109
+ }
110
+ options.merge!(user_options)
111
+ options[:files].each_with_index do |read_files, map_process_id|
112
+ cmd = options[:command].gsub('/THREADS/', @threads.to_s)
113
+ cmd.gsub!('/REFERENCE/', @ref)
114
+ if read_files.length == 1
115
+ cmd = cmd + " #{options[:flag_single]} #{read_files.first}"
116
+ @paired << false
117
+ elsif read_files.length == 2
118
+ @paired << true
119
+ cmd = cmd + " #{options[:additional_paired_flags]} #{options[:flags_paired].first} #{read_files.first} #{options[:flags_paired].last} #{read_files.last}"
120
+ else
121
+ raise('Incorrect number of read files. Must be 1 (single) or 2 (paired).')
122
+ end
123
+ map_file = nil
124
+ if options[:paired_pipe].nil? || options[:single_pipe].nil?
125
+ map_file = options[:output] + "_#{map_process_id}" + '.sam'
126
+ cmd = cmd + " #{options[:flag_output]} #{map_file} &> #{options[:log]}_#{map_process_id}"
127
+ else
128
+ if @paired[map_process_id]
129
+ pipe = options[:paired_pipe]
130
+ else
131
+ pipe = options[:single_pipe]
132
+ end
133
+ map_file = options[:output] + "_#{map_process_id}" + '.bam'
134
+ cmd = cmd + " 2> #{options[:log]}_#{map_process_id} " + pipe.gsub('/OUTPUT/', map_file)
135
+ end
136
+ @map_files << map_file
137
+ system(cmd) if options[:force] || !File.exists?(map_file)
138
+ @total_reads << File.open("#{options[:log]}_#{map_process_id}").readlines.select{|line| /\d+ reads; of these:/ =~ line}.first.split(' ').first.to_i if File.exists?("#{options[:log]}_#{map_process_id}") && @total_reads[map_process_id].nil?
139
+ raise('ERROR: The mapping process has failed, please check the map folder into the temp folder') if @total_reads[map_process_id].nil? || @total_reads[map_process_id] == 0
140
+ end
141
+ end
142
+
143
+ def index(user_options = {})
144
+ @map_files.each do |map_file|
145
+ system("samtools index #{map_file}") if (map_file.include?('.bam') && !File.exists?(map_file+'.bai')) || user_options[:force]
146
+ end
147
+ end
148
+
149
+ def report
150
+ reports = []
151
+ @map_files.each do |map_file|
152
+ cmd = "samtools flagstat #{map_file}"
153
+ report = %x[#{cmd}].split("\n")
154
+ reports << report
155
+ end
156
+ return reports
157
+ end
158
+
159
+ def idxstats
160
+ @map_files.each_with_index do |map_file, map_process_id|
161
+ prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
162
+ file_path = File.join(@temp_folder, "#{prefix}_idxstats_#{map_process_id}.gz")
163
+ cmd = "samtools idxstats #{map_file} | gzip - -f > #{file_path}"
164
+ system(cmd) if !File.exists?(file_path)
165
+ parse_idxstats(file_path)
166
+ end
167
+ end
168
+
169
+ def mpileup(user_options = {})
170
+ parse_options = {
171
+ add_coverages: false,
172
+ normalize_coverages: false,
173
+ cols: [1,2,4] # 1 based for cut
174
+ }
175
+ parse_options.merge!(user_options.delete(:parse_options)) if !user_options[:parse_options].nil?
176
+ opts = []
177
+ do_samtools_ref
178
+ user_options.each do |flag, value|
179
+ opts << [flag, value.to_s]
180
+ end
181
+
182
+ contig_list_file = File.join(@temp_folder, File.basename(@ref_fasta_path)+'.lst')
183
+ system("grep '>' #{@ref_fasta_path} | sed 's/>//g' > #{contig_list_file}") if !File.exists?(contig_list_file)
184
+ idxstats if @idxstats.empty?
185
+ cut = nil
186
+ cut = " |cut -f #{parse_options[:cols].join(',')}" if !parse_options[:cols].nil? && !parse_options[:cols].empty?
187
+ mpileup_files = []
188
+ @map_files.each_with_index do |map_file, map_process_id|
189
+ prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
190
+ file_path = File.join(@temp_folder, "#{prefix}_mpileup_#{map_process_id}.gz")
191
+ mpileup_files << file_path
192
+ cmd = "samtools mpileup -f #{@ref_fasta_path} #{opts.join(' ')} #{map_file}#{cut} | gzip - -f > #{file_path}"
193
+ system(cmd) if !File.exists?(file_path)
194
+ end
195
+ coverage_results = {}
196
+
197
+ parse_mpileup(mpileup_files, contig_list_file) do |contig_name, contig_length, coverages|
198
+ mapped_reads = @idxstats.map{|info| info[contig_name][:mapped]}.inject { |sum, n| sum + n }
199
+ get_coverage_parameters(contig_name, contig_length, mapped_reads, coverages, parse_options, coverage_results)
200
+ end
201
+ return coverage_results
202
+ end
203
+
204
+ def parse_mpileup(file_paths, contig_list_file)
205
+ last_contig = nil
206
+ mpileup_files = file_paths.map{|file_path| Mpileup.new(file_path)}
207
+ File.open(contig_list_file).each do |contig_name|
208
+ contig_name.chomp!
209
+ contig_length = @idxstats.first[contig_name][:length]
210
+ all_coverages = []
211
+ mpileup_files.each do |mpileup_file|
212
+ coverages = mpileup_file.read_contig(contig_name, contig_length)
213
+ all_coverages << coverages if !coverages.nil? && !coverages.empty?
214
+ end
215
+ yield(contig_name, contig_length, all_coverages)
216
+ end
217
+ mpileup_files.map{|mf| mf.close}
218
+ end
219
+
220
+
221
+ def parse_idxstats(file_path)
222
+ stats = {}
223
+ stats_file = ScbiZcatFile.new(file_path)
224
+ while !stats_file.eof
225
+ fields = stats_file.readline.chomp.split("\t")
226
+ stats[fields[0]] = {length: fields[1].to_i, mapped: fields[2].to_i, unmmapped: fields[3].to_i}
227
+ end
228
+ stats_file.close
229
+ stats.delete('*')
230
+ @idxstats << stats
231
+ end
232
+
233
+
234
+ private
235
+
236
+ def get_coverage_parameters(seq_name, contig_length, mapped_reads, mpileup_info, options, coverage_results)
237
+ # begin
238
+ mean_normalized_differences = 0
239
+ mean_max = 0
240
+ mean_coverage = 0
241
+ proportion_sequence_mapped = 0
242
+ fpkm = 0
243
+ if mapped_reads > 0
244
+ if !mpileup_info.empty?
245
+ if mpileup_info.length == 1
246
+ coverages = mpileup_info.first
247
+ else
248
+ coverages = mpileup_info.transpose.map {|x| x.reduce(:+)}
249
+ end
250
+ mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm = calculate_coverage_parameters(coverages, contig_length, mapped_reads, options)
251
+ end
252
+ end
253
+ record = [mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm, mapped_reads]
254
+
255
+ record << coverages if options[:add_coverages]
256
+ coverage_results[seq_name] = record
257
+ # rescue Exception => e
258
+ # puts "ERROR: The reference sequence: #{seq_name} has failed",
259
+ # e.message,
260
+ # e.backtrace.join("\n")
261
+ # end
262
+ end
263
+
264
+ def calculate_coverage_parameters(coverages, ref_length, mapped_reads, options)
265
+ n_mates = 1.0
266
+ n_mates = 2.0 if @paired
267
+ millions = @total_reads.inject { |sum, n| sum + n }/1.0e6
268
+ mean_normalized_differences = 0
269
+ mean_max = 0
270
+ mean_coverage = 0
271
+ proportion_sequence_mapped = 0
272
+ fpkm = 0
273
+
274
+ greater0 = coverages.select{|c| c > 0}
275
+ coverages_greater0 = greater0.length
276
+ if coverages_greater0 > 0
277
+ fpkm = mapped_reads/n_mates/(ref_length/1000.0)/millions
278
+ mean_coverage = coverages.inject { |sum, n| sum + n }.fdiv(ref_length)
279
+ n_max = (coverages.length/10.0).ceil
280
+ maximums = coverages.sort{|c1, c2| c2 <=> c1}[0..n_max-1]
281
+ mean_max = maximums.inject { |sum, n| sum + n }.fdiv(n_max)
282
+
283
+ mean_coverage_filtered = greater0.inject { |sum, n| sum + n }.fdiv(coverages_greater0)
284
+ normalized_differences = greater0.map{|c| (c - mean_coverage_filtered).abs/mean_coverage_filtered}
285
+ mean_normalized_differences = normalized_differences.inject { |sum, n| sum + n } / normalized_differences.length
286
+ proportion_sequence_mapped = greater0.length.fdiv(ref_length)
287
+
288
+ if options[:normalize_coverages]
289
+ max = coverages.max
290
+ coverages.map!{|cov| cov.fdiv(max) }
291
+ end
292
+ end
293
+ return mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm
294
+ end
295
+
296
+ end
@@ -10,6 +10,7 @@ require 'artifacts'
10
10
  require 'blast_functions'
11
11
  require 'exonerate_result'
12
12
  require 'scbi_fasta'
13
+ require 'mapping'
13
14
 
14
15
  require 'fl_analysis'
15
16
  include FlAnalysis
@@ -28,17 +29,67 @@ class MyWorker < ScbiMapreduce::Worker
28
29
  $verbose = manager_options[:verbose]
29
30
  end
30
31
 
31
- def process_object(obj_sequence)
32
- # Punto de arranque de FLN
33
- $WORKER_LOG.info "Processing chunk: #{obj_sequence.first.seq_name}"
34
- full_lenghter2(obj_sequence)
35
- return obj_sequence
32
+ def process_object(initial_obj)
33
+ task = initial_obj.first.keys.first
34
+ obj = nil
35
+ if task == :fln
36
+ obj = initial_obj.map{|hash| hash[:fln]}
37
+ # Punto de arranque de FLN
38
+ $WORKER_LOG.info "Task: #{task}. Processing chunk: #{obj.first.seq_name}"
39
+ full_lenghter2(obj)
40
+ elsif task == :mapping
41
+ obj = initial_obj.first.values.first
42
+ $WORKER_LOG.info "Task: #{task}. Processing chunk: #{obj}"
43
+ obj = map_transcriptome(obj)
44
+ end
45
+ return {task => obj}
36
46
  end
37
47
 
38
48
  def closing_worker
39
49
 
40
50
  end
41
51
 
52
+ #####################################################################################
53
+ # MAPPING METHODS
54
+ #####################################################################################
55
+ def map_transcriptome(initial_obj)
56
+ ref_file = initial_obj
57
+ prefix = File.basename(ref_file, '.fasta')
58
+
59
+
60
+ mapping2 = Mapping.new(
61
+ ref_fasta_path: File.join(@options[:temp_map_folder], ref_file),
62
+ threads: 1,
63
+ temp_folder: @options[:temp_map_folder]
64
+ )
65
+ $WORKER_LOG.info "Do bowtie ref"
66
+
67
+ mapping2.do_ref(
68
+ name: "#{prefix}_ref",
69
+ log: "#{prefix}_reference_log"
70
+ )
71
+
72
+ $WORKER_LOG.info "Do bowtie mapping"
73
+ mapping2.do_map(
74
+ files: @options[:files2map],
75
+ command: 'bowtie2 -p /THREADS/ -x /REFERENCE/ -a',
76
+ paired_pipe: '| samtools view -bS -f 2 | samtools sort -o /OUTPUT/',
77
+ single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
78
+ additional_paired_flags: '--no-mixed',
79
+ output: File.join(@options[:temp_map_folder], "#{prefix}_map_data"),
80
+ log: File.join(@options[:temp_map_folder], "#{prefix}_mapping_log"),
81
+ )
82
+
83
+ $WORKER_LOG.info "Do samtools ref"
84
+ mapping2.index
85
+ $WORKER_LOG.info "Do idxstats"
86
+ mapping2.idxstats
87
+ $WORKER_LOG.info "Do mpileup and coverage analysis"
88
+ map_object2 = mapping2.mpileup('--ff' => 'UNMAP,QCFAIL')
89
+ $WORKER_LOG.info "Finished coverage analysis"
90
+ return map_object2
91
+ end
92
+
42
93
  #####################################################################################
43
94
  # FLN FUNCTIONS
44
95
  #####################################################################################
@@ -49,15 +100,18 @@ class MyWorker < ScbiMapreduce::Worker
49
100
 
50
101
  def full_lenghter2(seqs)
51
102
  #seqs.map{|seq| seq.change_degenerated_nt!} # Clean degenerated nt
52
-
103
+ check_seqs = seqs
104
+ if !@options[:files2map].empty? && @options[:remove_unmapped]
105
+ check_seqs = check_mapping(seqs)
106
+ end
107
+
53
108
  # User database
54
109
  #--------------------------------------------
55
110
  # if the user has included his own database in the parameters entry,
56
111
  # the location of the database is tested, and blast and the results analysis is done
57
- check_seqs = seqs
58
112
  if @options[:user_db]
59
113
  user_db = File.basename(@options[:user_db])
60
- check_seqs = check_prot_db(seqs, @options[:user_db], 'blastx', 1, user_db, @options[:blast])
114
+ check_seqs = check_prot_db(check_seqs, @options[:user_db], 'blastx', 1, user_db, @options[:blast])
61
115
  end
62
116
 
63
117
  # UniProt (sp)
@@ -95,6 +149,14 @@ class MyWorker < ScbiMapreduce::Worker
95
149
  #----------------------------------------------------------------------------------
96
150
  # END MAIN
97
151
  #----------------------------------------------------------------------------------
152
+ def check_mapping(seqs)
153
+ new_seqs = []
154
+ seqs.each do |s|
155
+ artifact?(s, nil, 'mapping', '', @options, new_seqs)
156
+ end
157
+ seqs.concat(new_seqs)
158
+ return seqs.select{|s| !s.ignore }
159
+ end
98
160
 
99
161
  def check_prot_db(seqs, db_path, blast_type, evalue, db_name, additional_blast_options)
100
162
 
@@ -136,7 +198,7 @@ class MyWorker < ScbiMapreduce::Worker
136
198
  $WORKER_LOG.info "DB: #{File.basename(database)} #{input.length}"
137
199
  blast = BatchBlast.new("-db #{database}", blast_type, "-evalue #{evalue} #{additional_blast_options}")
138
200
  chunk_name = input.first.seq_name.gsub(/\W+/,'_')
139
- file_path = File.join('temp', File.basename(database)+'_'+chunk_name)
201
+ file_path = File.join(@options[:temp], File.basename(database)+'_'+chunk_name)
140
202
  if @options[:hdd] #Write/parse blast on Disk
141
203
  file_name = file_path+'.blast' #Each blast is identified with database_name and first sequence's name on chunk
142
204
  if !File.exists?(file_name)