full_lengther_next 0.6.2 → 0.9.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +6 -37
- data/bin/console +14 -0
- data/bin/download_fln_dbs.rb +2 -7
- data/bin/full_lengther_next +85 -6
- data/bin/make_user_db.rb +13 -5
- data/bin/setup +8 -0
- data/full_lengther_next.gemspec +42 -0
- data/lib/full_lengther_next.rb +2 -10
- data/lib/full_lengther_next/artifacts.rb +74 -0
- data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
- data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
- data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
- data/lib/full_lengther_next/fln_stats.rb +613 -0
- data/lib/full_lengther_next/go_methods.rb +42 -0
- data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
- data/lib/full_lengther_next/mapping.rb +296 -0
- data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
- data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
- data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
- data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
- data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
- data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
- data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
- data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
- data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
- data/lib/full_lengther_next/version.rb +3 -0
- data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
- data/report_templates/general_summary.erb +140 -0
- data/report_templates/mapping_summary.erb +98 -0
- data/report_templates/reptrans_summary.erb +32 -0
- metadata +112 -134
- data/.gemtest +0 -0
- data/History.txt +0 -32
- data/Manifest.txt +0 -44
- data/PostInstall.txt +0 -6
- data/bin/plot_fln.rb +0 -270
- data/bin/plot_taxonomy.rb +0 -70
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next/classes/artifacts.rb +0 -66
- data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_full_lengther_next.rb +0 -11
- data/test/test_helper.rb +0 -3
@@ -0,0 +1,42 @@
|
|
1
|
+
def go_for_graph(sequences_by_ontologies, fpkm = {})
|
2
|
+
container = {}
|
3
|
+
go_data = [
|
4
|
+
[:function_go, 'F:'],
|
5
|
+
[:component_go, 'C:'],
|
6
|
+
[:process_go, 'P:']
|
7
|
+
]
|
8
|
+
|
9
|
+
go_data.each do |key, prefix|
|
10
|
+
go_ontology = sequences_by_ontologies.select{|go, seq_ids| go =~ /^#{prefix}/}
|
11
|
+
go_names = []
|
12
|
+
go_vals = []
|
13
|
+
go_ontology.each do |go_name, seq_names|
|
14
|
+
go_label = go_name.gsub(prefix, '')
|
15
|
+
if fpkm.empty?
|
16
|
+
go_vals << seq_names.length
|
17
|
+
go_names << go_label
|
18
|
+
else
|
19
|
+
sum = seq_names.map{|seq_name| fpkm[seq_name].first }.inject { |sum, n| sum + n }
|
20
|
+
if sum > 0
|
21
|
+
go_vals << sum
|
22
|
+
go_names << go_label
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
go_table = []
|
27
|
+
go_names.each_with_index do |name, index|
|
28
|
+
go_table << [name, go_vals[index]]
|
29
|
+
end
|
30
|
+
go_table.sort!{|v1, v2| v2[1] <=> v1[1]}
|
31
|
+
go_table.unshift([key.to_s, 'GO'])
|
32
|
+
if !go_names.empty?
|
33
|
+
container[key] = go_table
|
34
|
+
else
|
35
|
+
container[key] = [
|
36
|
+
[key.to_s, 'GO'],
|
37
|
+
['No_data', 1]
|
38
|
+
]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
return container
|
42
|
+
end
|
File without changes
|
@@ -0,0 +1,296 @@
|
|
1
|
+
require 'scbi_zcat'
|
2
|
+
|
3
|
+
class Mpileup
|
4
|
+
def initialize(file_path)
|
5
|
+
@mpileup_file = ScbiZcatFile.new(file_path)
|
6
|
+
@last_line = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def read_contig(contig_name, contig_length)
|
10
|
+
coverages = []
|
11
|
+
if !@last_line.nil?
|
12
|
+
if @last_line[0] != contig_name
|
13
|
+
return nil
|
14
|
+
else
|
15
|
+
coverages = initialize_contig(contig_length)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
line = @mpileup_file.readline
|
19
|
+
if line.nil?
|
20
|
+
@last_line = nil
|
21
|
+
return nil
|
22
|
+
else
|
23
|
+
@last_line = line.chomp.split("\t")
|
24
|
+
if @last_line[0] != contig_name
|
25
|
+
return nil
|
26
|
+
else
|
27
|
+
coverages = initialize_contig(contig_length)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
while !@mpileup_file.eof
|
33
|
+
fields = @mpileup_file.readline.chomp.split("\t")
|
34
|
+
contig = fields[0]
|
35
|
+
if contig == contig_name
|
36
|
+
coverages[fields[1].to_i-1] = fields[2].to_i
|
37
|
+
else
|
38
|
+
@last_line = fields
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return coverages
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize_contig(contig_length)
|
46
|
+
coverages = Array.new(contig_length, 0)
|
47
|
+
coverages[@last_line[1].to_i-1] = @last_line[2].to_i
|
48
|
+
return coverages
|
49
|
+
end
|
50
|
+
|
51
|
+
def close
|
52
|
+
@mpileup_file.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Mapping
|
57
|
+
def initialize(user_options = {})
|
58
|
+
|
59
|
+
options = {
|
60
|
+
threads: 1,
|
61
|
+
total_reads: [],
|
62
|
+
}.merge!(user_options)
|
63
|
+
@ref_fasta_path = options[:ref_fasta_path]
|
64
|
+
@temp_folder = options[:temp_folder]
|
65
|
+
@threads = options[:threads]
|
66
|
+
|
67
|
+
@map_files = []
|
68
|
+
@paired = []
|
69
|
+
@idxstats = []
|
70
|
+
@mpileups = []
|
71
|
+
@coverage_results = {}
|
72
|
+
@total_reads = options[:total_reads]
|
73
|
+
end
|
74
|
+
|
75
|
+
def do_ref(user_options = {})
|
76
|
+
options = {
|
77
|
+
name: 'ref',
|
78
|
+
command: 'bowtie2-build -f --threads /THREADS/ /REF_FASTA/ /OUTPUT/',
|
79
|
+
log: 'reference_log',
|
80
|
+
force: false
|
81
|
+
}
|
82
|
+
options.merge!(user_options)
|
83
|
+
@ref = File.join(@temp_folder, options[:name])
|
84
|
+
cmd = options[:command].gsub('/THREADS/', @threads.to_s)
|
85
|
+
cmd.gsub!('/REF_FASTA/', @ref_fasta_path)
|
86
|
+
cmd.gsub!('/OUTPUT/', @ref)
|
87
|
+
cmd = cmd + " &> #{File.join(@temp_folder, options[:log])}"
|
88
|
+
system(cmd) if options[:force] || Dir.glob(@ref+'*.bt2').length == 0
|
89
|
+
end
|
90
|
+
|
91
|
+
def do_samtools_ref
|
92
|
+
cmd = "samtools faidx #{@ref_fasta_path}"
|
93
|
+
system(cmd) if !File.exists?(@ref_fasta_path + '.fai')
|
94
|
+
end
|
95
|
+
|
96
|
+
def do_map(user_options = {})
|
97
|
+
options = {
|
98
|
+
files: [],
|
99
|
+
command: 'bowtie2 -p /THREADS/ -x /REFERENCE/',
|
100
|
+
paired_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
|
101
|
+
single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
|
102
|
+
flag_single: '-U',
|
103
|
+
flags_paired: ['-1', '-2'],
|
104
|
+
additional_paired_flags: '',
|
105
|
+
flag_output: '-S',
|
106
|
+
output: File.join(@temp_folder, 'map_data'),
|
107
|
+
log: File.join(@temp_folder, 'mapping_log'),
|
108
|
+
force: false
|
109
|
+
}
|
110
|
+
options.merge!(user_options)
|
111
|
+
options[:files].each_with_index do |read_files, map_process_id|
|
112
|
+
cmd = options[:command].gsub('/THREADS/', @threads.to_s)
|
113
|
+
cmd.gsub!('/REFERENCE/', @ref)
|
114
|
+
if read_files.length == 1
|
115
|
+
cmd = cmd + " #{options[:flag_single]} #{read_files.first}"
|
116
|
+
@paired << false
|
117
|
+
elsif read_files.length == 2
|
118
|
+
@paired << true
|
119
|
+
cmd = cmd + " #{options[:additional_paired_flags]} #{options[:flags_paired].first} #{read_files.first} #{options[:flags_paired].last} #{read_files.last}"
|
120
|
+
else
|
121
|
+
raise('Incorrect number of read files. Must be 1 (single) or 2 (paired).')
|
122
|
+
end
|
123
|
+
map_file = nil
|
124
|
+
if options[:paired_pipe].nil? || options[:single_pipe].nil?
|
125
|
+
map_file = options[:output] + "_#{map_process_id}" + '.sam'
|
126
|
+
cmd = cmd + " #{options[:flag_output]} #{map_file} &> #{options[:log]}_#{map_process_id}"
|
127
|
+
else
|
128
|
+
if @paired[map_process_id]
|
129
|
+
pipe = options[:paired_pipe]
|
130
|
+
else
|
131
|
+
pipe = options[:single_pipe]
|
132
|
+
end
|
133
|
+
map_file = options[:output] + "_#{map_process_id}" + '.bam'
|
134
|
+
cmd = cmd + " 2> #{options[:log]}_#{map_process_id} " + pipe.gsub('/OUTPUT/', map_file)
|
135
|
+
end
|
136
|
+
@map_files << map_file
|
137
|
+
system(cmd) if options[:force] || !File.exists?(map_file)
|
138
|
+
@total_reads << File.open("#{options[:log]}_#{map_process_id}").readlines.select{|line| /\d+ reads; of these:/ =~ line}.first.split(' ').first.to_i if File.exists?("#{options[:log]}_#{map_process_id}") && @total_reads[map_process_id].nil?
|
139
|
+
raise('ERROR: The mapping process has failed, please check the map folder into the temp folder') if @total_reads[map_process_id].nil? || @total_reads[map_process_id] == 0
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def index(user_options = {})
|
144
|
+
@map_files.each do |map_file|
|
145
|
+
system("samtools index #{map_file}") if (map_file.include?('.bam') && !File.exists?(map_file+'.bai')) || user_options[:force]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def report
|
150
|
+
reports = []
|
151
|
+
@map_files.each do |map_file|
|
152
|
+
cmd = "samtools flagstat #{map_file}"
|
153
|
+
report = %x[#{cmd}].split("\n")
|
154
|
+
reports << report
|
155
|
+
end
|
156
|
+
return reports
|
157
|
+
end
|
158
|
+
|
159
|
+
def idxstats
|
160
|
+
@map_files.each_with_index do |map_file, map_process_id|
|
161
|
+
prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
|
162
|
+
file_path = File.join(@temp_folder, "#{prefix}_idxstats_#{map_process_id}.gz")
|
163
|
+
cmd = "samtools idxstats #{map_file} | gzip - -f > #{file_path}"
|
164
|
+
system(cmd) if !File.exists?(file_path)
|
165
|
+
parse_idxstats(file_path)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def mpileup(user_options = {})
|
170
|
+
parse_options = {
|
171
|
+
add_coverages: false,
|
172
|
+
normalize_coverages: false,
|
173
|
+
cols: [1,2,4] # 1 based for cut
|
174
|
+
}
|
175
|
+
parse_options.merge!(user_options.delete(:parse_options)) if !user_options[:parse_options].nil?
|
176
|
+
opts = []
|
177
|
+
do_samtools_ref
|
178
|
+
user_options.each do |flag, value|
|
179
|
+
opts << [flag, value.to_s]
|
180
|
+
end
|
181
|
+
|
182
|
+
contig_list_file = File.join(@temp_folder, File.basename(@ref_fasta_path)+'.lst')
|
183
|
+
system("grep '>' #{@ref_fasta_path} | sed 's/>//g' > #{contig_list_file}") if !File.exists?(contig_list_file)
|
184
|
+
idxstats if @idxstats.empty?
|
185
|
+
cut = nil
|
186
|
+
cut = " |cut -f #{parse_options[:cols].join(',')}" if !parse_options[:cols].nil? && !parse_options[:cols].empty?
|
187
|
+
mpileup_files = []
|
188
|
+
@map_files.each_with_index do |map_file, map_process_id|
|
189
|
+
prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
|
190
|
+
file_path = File.join(@temp_folder, "#{prefix}_mpileup_#{map_process_id}.gz")
|
191
|
+
mpileup_files << file_path
|
192
|
+
cmd = "samtools mpileup -f #{@ref_fasta_path} #{opts.join(' ')} #{map_file}#{cut} | gzip - -f > #{file_path}"
|
193
|
+
system(cmd) if !File.exists?(file_path)
|
194
|
+
end
|
195
|
+
coverage_results = {}
|
196
|
+
|
197
|
+
parse_mpileup(mpileup_files, contig_list_file) do |contig_name, contig_length, coverages|
|
198
|
+
mapped_reads = @idxstats.map{|info| info[contig_name][:mapped]}.inject { |sum, n| sum + n }
|
199
|
+
get_coverage_parameters(contig_name, contig_length, mapped_reads, coverages, parse_options, coverage_results)
|
200
|
+
end
|
201
|
+
return coverage_results
|
202
|
+
end
|
203
|
+
|
204
|
+
def parse_mpileup(file_paths, contig_list_file)
|
205
|
+
last_contig = nil
|
206
|
+
mpileup_files = file_paths.map{|file_path| Mpileup.new(file_path)}
|
207
|
+
File.open(contig_list_file).each do |contig_name|
|
208
|
+
contig_name.chomp!
|
209
|
+
contig_length = @idxstats.first[contig_name][:length]
|
210
|
+
all_coverages = []
|
211
|
+
mpileup_files.each do |mpileup_file|
|
212
|
+
coverages = mpileup_file.read_contig(contig_name, contig_length)
|
213
|
+
all_coverages << coverages if !coverages.nil? && !coverages.empty?
|
214
|
+
end
|
215
|
+
yield(contig_name, contig_length, all_coverages)
|
216
|
+
end
|
217
|
+
mpileup_files.map{|mf| mf.close}
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
def parse_idxstats(file_path)
|
222
|
+
stats = {}
|
223
|
+
stats_file = ScbiZcatFile.new(file_path)
|
224
|
+
while !stats_file.eof
|
225
|
+
fields = stats_file.readline.chomp.split("\t")
|
226
|
+
stats[fields[0]] = {length: fields[1].to_i, mapped: fields[2].to_i, unmmapped: fields[3].to_i}
|
227
|
+
end
|
228
|
+
stats_file.close
|
229
|
+
stats.delete('*')
|
230
|
+
@idxstats << stats
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
def get_coverage_parameters(seq_name, contig_length, mapped_reads, mpileup_info, options, coverage_results)
|
237
|
+
# begin
|
238
|
+
mean_normalized_differences = 0
|
239
|
+
mean_max = 0
|
240
|
+
mean_coverage = 0
|
241
|
+
proportion_sequence_mapped = 0
|
242
|
+
fpkm = 0
|
243
|
+
if mapped_reads > 0
|
244
|
+
if !mpileup_info.empty?
|
245
|
+
if mpileup_info.length == 1
|
246
|
+
coverages = mpileup_info.first
|
247
|
+
else
|
248
|
+
coverages = mpileup_info.transpose.map {|x| x.reduce(:+)}
|
249
|
+
end
|
250
|
+
mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm = calculate_coverage_parameters(coverages, contig_length, mapped_reads, options)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
record = [mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm, mapped_reads]
|
254
|
+
|
255
|
+
record << coverages if options[:add_coverages]
|
256
|
+
coverage_results[seq_name] = record
|
257
|
+
# rescue Exception => e
|
258
|
+
# puts "ERROR: The reference sequence: #{seq_name} has failed",
|
259
|
+
# e.message,
|
260
|
+
# e.backtrace.join("\n")
|
261
|
+
# end
|
262
|
+
end
|
263
|
+
|
264
|
+
def calculate_coverage_parameters(coverages, ref_length, mapped_reads, options)
|
265
|
+
n_mates = 1.0
|
266
|
+
n_mates = 2.0 if @paired
|
267
|
+
millions = @total_reads.inject { |sum, n| sum + n }/1.0e6
|
268
|
+
mean_normalized_differences = 0
|
269
|
+
mean_max = 0
|
270
|
+
mean_coverage = 0
|
271
|
+
proportion_sequence_mapped = 0
|
272
|
+
fpkm = 0
|
273
|
+
|
274
|
+
greater0 = coverages.select{|c| c > 0}
|
275
|
+
coverages_greater0 = greater0.length
|
276
|
+
if coverages_greater0 > 0
|
277
|
+
fpkm = mapped_reads/n_mates/(ref_length/1000.0)/millions
|
278
|
+
mean_coverage = coverages.inject { |sum, n| sum + n }.fdiv(ref_length)
|
279
|
+
n_max = (coverages.length/10.0).ceil
|
280
|
+
maximums = coverages.sort{|c1, c2| c2 <=> c1}[0..n_max-1]
|
281
|
+
mean_max = maximums.inject { |sum, n| sum + n }.fdiv(n_max)
|
282
|
+
|
283
|
+
mean_coverage_filtered = greater0.inject { |sum, n| sum + n }.fdiv(coverages_greater0)
|
284
|
+
normalized_differences = greater0.map{|c| (c - mean_coverage_filtered).abs/mean_coverage_filtered}
|
285
|
+
mean_normalized_differences = normalized_differences.inject { |sum, n| sum + n } / normalized_differences.length
|
286
|
+
proportion_sequence_mapped = greater0.length.fdiv(ref_length)
|
287
|
+
|
288
|
+
if options[:normalize_coverages]
|
289
|
+
max = coverages.max
|
290
|
+
coverages.map!{|cov| cov.fdiv(max) }
|
291
|
+
end
|
292
|
+
end
|
293
|
+
return mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm
|
294
|
+
end
|
295
|
+
|
296
|
+
end
|
@@ -10,6 +10,7 @@ require 'artifacts'
|
|
10
10
|
require 'blast_functions'
|
11
11
|
require 'exonerate_result'
|
12
12
|
require 'scbi_fasta'
|
13
|
+
require 'mapping'
|
13
14
|
|
14
15
|
require 'fl_analysis'
|
15
16
|
include FlAnalysis
|
@@ -28,17 +29,67 @@ class MyWorker < ScbiMapreduce::Worker
|
|
28
29
|
$verbose = manager_options[:verbose]
|
29
30
|
end
|
30
31
|
|
31
|
-
def process_object(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
def process_object(initial_obj)
|
33
|
+
task = initial_obj.first.keys.first
|
34
|
+
obj = nil
|
35
|
+
if task == :fln
|
36
|
+
obj = initial_obj.map{|hash| hash[:fln]}
|
37
|
+
# Punto de arranque de FLN
|
38
|
+
$WORKER_LOG.info "Task: #{task}. Processing chunk: #{obj.first.seq_name}"
|
39
|
+
full_lenghter2(obj)
|
40
|
+
elsif task == :mapping
|
41
|
+
obj = initial_obj.first.values.first
|
42
|
+
$WORKER_LOG.info "Task: #{task}. Processing chunk: #{obj}"
|
43
|
+
obj = map_transcriptome(obj)
|
44
|
+
end
|
45
|
+
return {task => obj}
|
36
46
|
end
|
37
47
|
|
38
48
|
def closing_worker
|
39
49
|
|
40
50
|
end
|
41
51
|
|
52
|
+
#####################################################################################
|
53
|
+
# MAPPING METHODS
|
54
|
+
#####################################################################################
|
55
|
+
def map_transcriptome(initial_obj)
|
56
|
+
ref_file = initial_obj
|
57
|
+
prefix = File.basename(ref_file, '.fasta')
|
58
|
+
|
59
|
+
|
60
|
+
mapping2 = Mapping.new(
|
61
|
+
ref_fasta_path: File.join(@options[:temp_map_folder], ref_file),
|
62
|
+
threads: 1,
|
63
|
+
temp_folder: @options[:temp_map_folder]
|
64
|
+
)
|
65
|
+
$WORKER_LOG.info "Do bowtie ref"
|
66
|
+
|
67
|
+
mapping2.do_ref(
|
68
|
+
name: "#{prefix}_ref",
|
69
|
+
log: "#{prefix}_reference_log"
|
70
|
+
)
|
71
|
+
|
72
|
+
$WORKER_LOG.info "Do bowtie mapping"
|
73
|
+
mapping2.do_map(
|
74
|
+
files: @options[:files2map],
|
75
|
+
command: 'bowtie2 -p /THREADS/ -x /REFERENCE/ -a',
|
76
|
+
paired_pipe: '| samtools view -bS -f 2 | samtools sort -o /OUTPUT/',
|
77
|
+
single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
|
78
|
+
additional_paired_flags: '--no-mixed',
|
79
|
+
output: File.join(@options[:temp_map_folder], "#{prefix}_map_data"),
|
80
|
+
log: File.join(@options[:temp_map_folder], "#{prefix}_mapping_log"),
|
81
|
+
)
|
82
|
+
|
83
|
+
$WORKER_LOG.info "Do samtools ref"
|
84
|
+
mapping2.index
|
85
|
+
$WORKER_LOG.info "Do idxstats"
|
86
|
+
mapping2.idxstats
|
87
|
+
$WORKER_LOG.info "Do mpileup and coverage analysis"
|
88
|
+
map_object2 = mapping2.mpileup('--ff' => 'UNMAP,QCFAIL')
|
89
|
+
$WORKER_LOG.info "Finished coverage analysis"
|
90
|
+
return map_object2
|
91
|
+
end
|
92
|
+
|
42
93
|
#####################################################################################
|
43
94
|
# FLN FUNCTIONS
|
44
95
|
#####################################################################################
|
@@ -49,15 +100,18 @@ class MyWorker < ScbiMapreduce::Worker
|
|
49
100
|
|
50
101
|
def full_lenghter2(seqs)
|
51
102
|
#seqs.map{|seq| seq.change_degenerated_nt!} # Clean degenerated nt
|
52
|
-
|
103
|
+
check_seqs = seqs
|
104
|
+
if !@options[:files2map].empty? && @options[:remove_unmapped]
|
105
|
+
check_seqs = check_mapping(seqs)
|
106
|
+
end
|
107
|
+
|
53
108
|
# User database
|
54
109
|
#--------------------------------------------
|
55
110
|
# if the user has included his own database in the parameters entry,
|
56
111
|
# the location of the database is tested, and blast and the results analysis is done
|
57
|
-
check_seqs = seqs
|
58
112
|
if @options[:user_db]
|
59
113
|
user_db = File.basename(@options[:user_db])
|
60
|
-
check_seqs = check_prot_db(
|
114
|
+
check_seqs = check_prot_db(check_seqs, @options[:user_db], 'blastx', 1, user_db, @options[:blast])
|
61
115
|
end
|
62
116
|
|
63
117
|
# UniProt (sp)
|
@@ -95,6 +149,14 @@ class MyWorker < ScbiMapreduce::Worker
|
|
95
149
|
#----------------------------------------------------------------------------------
|
96
150
|
# END MAIN
|
97
151
|
#----------------------------------------------------------------------------------
|
152
|
+
def check_mapping(seqs)
|
153
|
+
new_seqs = []
|
154
|
+
seqs.each do |s|
|
155
|
+
artifact?(s, nil, 'mapping', '', @options, new_seqs)
|
156
|
+
end
|
157
|
+
seqs.concat(new_seqs)
|
158
|
+
return seqs.select{|s| !s.ignore }
|
159
|
+
end
|
98
160
|
|
99
161
|
def check_prot_db(seqs, db_path, blast_type, evalue, db_name, additional_blast_options)
|
100
162
|
|
@@ -136,7 +198,7 @@ class MyWorker < ScbiMapreduce::Worker
|
|
136
198
|
$WORKER_LOG.info "DB: #{File.basename(database)} #{input.length}"
|
137
199
|
blast = BatchBlast.new("-db #{database}", blast_type, "-evalue #{evalue} #{additional_blast_options}")
|
138
200
|
chunk_name = input.first.seq_name.gsub(/\W+/,'_')
|
139
|
-
file_path = File.join(
|
201
|
+
file_path = File.join(@options[:temp], File.basename(database)+'_'+chunk_name)
|
140
202
|
if @options[:hdd] #Write/parse blast on Disk
|
141
203
|
file_name = file_path+'.blast' #Each blast is identified with database_name and first sequence's name on chunk
|
142
204
|
if !File.exists?(file_name)
|