full_lengther_next 0.6.2 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +6 -37
- data/bin/console +14 -0
- data/bin/download_fln_dbs.rb +2 -7
- data/bin/full_lengther_next +85 -6
- data/bin/make_user_db.rb +13 -5
- data/bin/setup +8 -0
- data/full_lengther_next.gemspec +42 -0
- data/lib/full_lengther_next.rb +2 -10
- data/lib/full_lengther_next/artifacts.rb +74 -0
- data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
- data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
- data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
- data/lib/full_lengther_next/fln_stats.rb +613 -0
- data/lib/full_lengther_next/go_methods.rb +42 -0
- data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
- data/lib/full_lengther_next/mapping.rb +296 -0
- data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
- data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
- data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
- data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
- data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
- data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
- data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
- data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
- data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
- data/lib/full_lengther_next/version.rb +3 -0
- data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
- data/report_templates/general_summary.erb +140 -0
- data/report_templates/mapping_summary.erb +98 -0
- data/report_templates/reptrans_summary.erb +32 -0
- metadata +112 -134
- data/.gemtest +0 -0
- data/History.txt +0 -32
- data/Manifest.txt +0 -44
- data/PostInstall.txt +0 -6
- data/bin/plot_fln.rb +0 -270
- data/bin/plot_taxonomy.rb +0 -70
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next/classes/artifacts.rb +0 -66
- data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_full_lengther_next.rb +0 -11
- data/test/test_helper.rb +0 -3
@@ -0,0 +1,42 @@
|
|
1
|
+
def go_for_graph(sequences_by_ontologies, fpkm = {})
|
2
|
+
container = {}
|
3
|
+
go_data = [
|
4
|
+
[:function_go, 'F:'],
|
5
|
+
[:component_go, 'C:'],
|
6
|
+
[:process_go, 'P:']
|
7
|
+
]
|
8
|
+
|
9
|
+
go_data.each do |key, prefix|
|
10
|
+
go_ontology = sequences_by_ontologies.select{|go, seq_ids| go =~ /^#{prefix}/}
|
11
|
+
go_names = []
|
12
|
+
go_vals = []
|
13
|
+
go_ontology.each do |go_name, seq_names|
|
14
|
+
go_label = go_name.gsub(prefix, '')
|
15
|
+
if fpkm.empty?
|
16
|
+
go_vals << seq_names.length
|
17
|
+
go_names << go_label
|
18
|
+
else
|
19
|
+
sum = seq_names.map{|seq_name| fpkm[seq_name].first }.inject { |sum, n| sum + n }
|
20
|
+
if sum > 0
|
21
|
+
go_vals << sum
|
22
|
+
go_names << go_label
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
go_table = []
|
27
|
+
go_names.each_with_index do |name, index|
|
28
|
+
go_table << [name, go_vals[index]]
|
29
|
+
end
|
30
|
+
go_table.sort!{|v1, v2| v2[1] <=> v1[1]}
|
31
|
+
go_table.unshift([key.to_s, 'GO'])
|
32
|
+
if !go_names.empty?
|
33
|
+
container[key] = go_table
|
34
|
+
else
|
35
|
+
container[key] = [
|
36
|
+
[key.to_s, 'GO'],
|
37
|
+
['No_data', 1]
|
38
|
+
]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
return container
|
42
|
+
end
|
File without changes
|
@@ -0,0 +1,296 @@
|
|
1
|
+
require 'scbi_zcat'
|
2
|
+
|
3
|
+
class Mpileup
|
4
|
+
def initialize(file_path)
|
5
|
+
@mpileup_file = ScbiZcatFile.new(file_path)
|
6
|
+
@last_line = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def read_contig(contig_name, contig_length)
|
10
|
+
coverages = []
|
11
|
+
if !@last_line.nil?
|
12
|
+
if @last_line[0] != contig_name
|
13
|
+
return nil
|
14
|
+
else
|
15
|
+
coverages = initialize_contig(contig_length)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
line = @mpileup_file.readline
|
19
|
+
if line.nil?
|
20
|
+
@last_line = nil
|
21
|
+
return nil
|
22
|
+
else
|
23
|
+
@last_line = line.chomp.split("\t")
|
24
|
+
if @last_line[0] != contig_name
|
25
|
+
return nil
|
26
|
+
else
|
27
|
+
coverages = initialize_contig(contig_length)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
while !@mpileup_file.eof
|
33
|
+
fields = @mpileup_file.readline.chomp.split("\t")
|
34
|
+
contig = fields[0]
|
35
|
+
if contig == contig_name
|
36
|
+
coverages[fields[1].to_i-1] = fields[2].to_i
|
37
|
+
else
|
38
|
+
@last_line = fields
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return coverages
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize_contig(contig_length)
|
46
|
+
coverages = Array.new(contig_length, 0)
|
47
|
+
coverages[@last_line[1].to_i-1] = @last_line[2].to_i
|
48
|
+
return coverages
|
49
|
+
end
|
50
|
+
|
51
|
+
def close
|
52
|
+
@mpileup_file.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Mapping
|
57
|
+
def initialize(user_options = {})
|
58
|
+
|
59
|
+
options = {
|
60
|
+
threads: 1,
|
61
|
+
total_reads: [],
|
62
|
+
}.merge!(user_options)
|
63
|
+
@ref_fasta_path = options[:ref_fasta_path]
|
64
|
+
@temp_folder = options[:temp_folder]
|
65
|
+
@threads = options[:threads]
|
66
|
+
|
67
|
+
@map_files = []
|
68
|
+
@paired = []
|
69
|
+
@idxstats = []
|
70
|
+
@mpileups = []
|
71
|
+
@coverage_results = {}
|
72
|
+
@total_reads = options[:total_reads]
|
73
|
+
end
|
74
|
+
|
75
|
+
def do_ref(user_options = {})
|
76
|
+
options = {
|
77
|
+
name: 'ref',
|
78
|
+
command: 'bowtie2-build -f --threads /THREADS/ /REF_FASTA/ /OUTPUT/',
|
79
|
+
log: 'reference_log',
|
80
|
+
force: false
|
81
|
+
}
|
82
|
+
options.merge!(user_options)
|
83
|
+
@ref = File.join(@temp_folder, options[:name])
|
84
|
+
cmd = options[:command].gsub('/THREADS/', @threads.to_s)
|
85
|
+
cmd.gsub!('/REF_FASTA/', @ref_fasta_path)
|
86
|
+
cmd.gsub!('/OUTPUT/', @ref)
|
87
|
+
cmd = cmd + " &> #{File.join(@temp_folder, options[:log])}"
|
88
|
+
system(cmd) if options[:force] || Dir.glob(@ref+'*.bt2').length == 0
|
89
|
+
end
|
90
|
+
|
91
|
+
def do_samtools_ref
|
92
|
+
cmd = "samtools faidx #{@ref_fasta_path}"
|
93
|
+
system(cmd) if !File.exists?(@ref_fasta_path + '.fai')
|
94
|
+
end
|
95
|
+
|
96
|
+
def do_map(user_options = {})
|
97
|
+
options = {
|
98
|
+
files: [],
|
99
|
+
command: 'bowtie2 -p /THREADS/ -x /REFERENCE/',
|
100
|
+
paired_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
|
101
|
+
single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
|
102
|
+
flag_single: '-U',
|
103
|
+
flags_paired: ['-1', '-2'],
|
104
|
+
additional_paired_flags: '',
|
105
|
+
flag_output: '-S',
|
106
|
+
output: File.join(@temp_folder, 'map_data'),
|
107
|
+
log: File.join(@temp_folder, 'mapping_log'),
|
108
|
+
force: false
|
109
|
+
}
|
110
|
+
options.merge!(user_options)
|
111
|
+
options[:files].each_with_index do |read_files, map_process_id|
|
112
|
+
cmd = options[:command].gsub('/THREADS/', @threads.to_s)
|
113
|
+
cmd.gsub!('/REFERENCE/', @ref)
|
114
|
+
if read_files.length == 1
|
115
|
+
cmd = cmd + " #{options[:flag_single]} #{read_files.first}"
|
116
|
+
@paired << false
|
117
|
+
elsif read_files.length == 2
|
118
|
+
@paired << true
|
119
|
+
cmd = cmd + " #{options[:additional_paired_flags]} #{options[:flags_paired].first} #{read_files.first} #{options[:flags_paired].last} #{read_files.last}"
|
120
|
+
else
|
121
|
+
raise('Incorrect number of read files. Must be 1 (single) or 2 (paired).')
|
122
|
+
end
|
123
|
+
map_file = nil
|
124
|
+
if options[:paired_pipe].nil? || options[:single_pipe].nil?
|
125
|
+
map_file = options[:output] + "_#{map_process_id}" + '.sam'
|
126
|
+
cmd = cmd + " #{options[:flag_output]} #{map_file} &> #{options[:log]}_#{map_process_id}"
|
127
|
+
else
|
128
|
+
if @paired[map_process_id]
|
129
|
+
pipe = options[:paired_pipe]
|
130
|
+
else
|
131
|
+
pipe = options[:single_pipe]
|
132
|
+
end
|
133
|
+
map_file = options[:output] + "_#{map_process_id}" + '.bam'
|
134
|
+
cmd = cmd + " 2> #{options[:log]}_#{map_process_id} " + pipe.gsub('/OUTPUT/', map_file)
|
135
|
+
end
|
136
|
+
@map_files << map_file
|
137
|
+
system(cmd) if options[:force] || !File.exists?(map_file)
|
138
|
+
@total_reads << File.open("#{options[:log]}_#{map_process_id}").readlines.select{|line| /\d+ reads; of these:/ =~ line}.first.split(' ').first.to_i if File.exists?("#{options[:log]}_#{map_process_id}") && @total_reads[map_process_id].nil?
|
139
|
+
raise('ERROR: The mapping process has failed, please check the map folder into the temp folder') if @total_reads[map_process_id].nil? || @total_reads[map_process_id] == 0
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def index(user_options = {})
|
144
|
+
@map_files.each do |map_file|
|
145
|
+
system("samtools index #{map_file}") if (map_file.include?('.bam') && !File.exists?(map_file+'.bai')) || user_options[:force]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def report
|
150
|
+
reports = []
|
151
|
+
@map_files.each do |map_file|
|
152
|
+
cmd = "samtools flagstat #{map_file}"
|
153
|
+
report = %x[#{cmd}].split("\n")
|
154
|
+
reports << report
|
155
|
+
end
|
156
|
+
return reports
|
157
|
+
end
|
158
|
+
|
159
|
+
def idxstats
|
160
|
+
@map_files.each_with_index do |map_file, map_process_id|
|
161
|
+
prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
|
162
|
+
file_path = File.join(@temp_folder, "#{prefix}_idxstats_#{map_process_id}.gz")
|
163
|
+
cmd = "samtools idxstats #{map_file} | gzip - -f > #{file_path}"
|
164
|
+
system(cmd) if !File.exists?(file_path)
|
165
|
+
parse_idxstats(file_path)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def mpileup(user_options = {})
|
170
|
+
parse_options = {
|
171
|
+
add_coverages: false,
|
172
|
+
normalize_coverages: false,
|
173
|
+
cols: [1,2,4] # 1 based for cut
|
174
|
+
}
|
175
|
+
parse_options.merge!(user_options.delete(:parse_options)) if !user_options[:parse_options].nil?
|
176
|
+
opts = []
|
177
|
+
do_samtools_ref
|
178
|
+
user_options.each do |flag, value|
|
179
|
+
opts << [flag, value.to_s]
|
180
|
+
end
|
181
|
+
|
182
|
+
contig_list_file = File.join(@temp_folder, File.basename(@ref_fasta_path)+'.lst')
|
183
|
+
system("grep '>' #{@ref_fasta_path} | sed 's/>//g' > #{contig_list_file}") if !File.exists?(contig_list_file)
|
184
|
+
idxstats if @idxstats.empty?
|
185
|
+
cut = nil
|
186
|
+
cut = " |cut -f #{parse_options[:cols].join(',')}" if !parse_options[:cols].nil? && !parse_options[:cols].empty?
|
187
|
+
mpileup_files = []
|
188
|
+
@map_files.each_with_index do |map_file, map_process_id|
|
189
|
+
prefix = File.basename(map_file).gsub(/\.bam|\.sam|\.cram/, '')
|
190
|
+
file_path = File.join(@temp_folder, "#{prefix}_mpileup_#{map_process_id}.gz")
|
191
|
+
mpileup_files << file_path
|
192
|
+
cmd = "samtools mpileup -f #{@ref_fasta_path} #{opts.join(' ')} #{map_file}#{cut} | gzip - -f > #{file_path}"
|
193
|
+
system(cmd) if !File.exists?(file_path)
|
194
|
+
end
|
195
|
+
coverage_results = {}
|
196
|
+
|
197
|
+
parse_mpileup(mpileup_files, contig_list_file) do |contig_name, contig_length, coverages|
|
198
|
+
mapped_reads = @idxstats.map{|info| info[contig_name][:mapped]}.inject { |sum, n| sum + n }
|
199
|
+
get_coverage_parameters(contig_name, contig_length, mapped_reads, coverages, parse_options, coverage_results)
|
200
|
+
end
|
201
|
+
return coverage_results
|
202
|
+
end
|
203
|
+
|
204
|
+
def parse_mpileup(file_paths, contig_list_file)
|
205
|
+
last_contig = nil
|
206
|
+
mpileup_files = file_paths.map{|file_path| Mpileup.new(file_path)}
|
207
|
+
File.open(contig_list_file).each do |contig_name|
|
208
|
+
contig_name.chomp!
|
209
|
+
contig_length = @idxstats.first[contig_name][:length]
|
210
|
+
all_coverages = []
|
211
|
+
mpileup_files.each do |mpileup_file|
|
212
|
+
coverages = mpileup_file.read_contig(contig_name, contig_length)
|
213
|
+
all_coverages << coverages if !coverages.nil? && !coverages.empty?
|
214
|
+
end
|
215
|
+
yield(contig_name, contig_length, all_coverages)
|
216
|
+
end
|
217
|
+
mpileup_files.map{|mf| mf.close}
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
def parse_idxstats(file_path)
|
222
|
+
stats = {}
|
223
|
+
stats_file = ScbiZcatFile.new(file_path)
|
224
|
+
while !stats_file.eof
|
225
|
+
fields = stats_file.readline.chomp.split("\t")
|
226
|
+
stats[fields[0]] = {length: fields[1].to_i, mapped: fields[2].to_i, unmmapped: fields[3].to_i}
|
227
|
+
end
|
228
|
+
stats_file.close
|
229
|
+
stats.delete('*')
|
230
|
+
@idxstats << stats
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
def get_coverage_parameters(seq_name, contig_length, mapped_reads, mpileup_info, options, coverage_results)
|
237
|
+
# begin
|
238
|
+
mean_normalized_differences = 0
|
239
|
+
mean_max = 0
|
240
|
+
mean_coverage = 0
|
241
|
+
proportion_sequence_mapped = 0
|
242
|
+
fpkm = 0
|
243
|
+
if mapped_reads > 0
|
244
|
+
if !mpileup_info.empty?
|
245
|
+
if mpileup_info.length == 1
|
246
|
+
coverages = mpileup_info.first
|
247
|
+
else
|
248
|
+
coverages = mpileup_info.transpose.map {|x| x.reduce(:+)}
|
249
|
+
end
|
250
|
+
mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm = calculate_coverage_parameters(coverages, contig_length, mapped_reads, options)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
record = [mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm, mapped_reads]
|
254
|
+
|
255
|
+
record << coverages if options[:add_coverages]
|
256
|
+
coverage_results[seq_name] = record
|
257
|
+
# rescue Exception => e
|
258
|
+
# puts "ERROR: The reference sequence: #{seq_name} has failed",
|
259
|
+
# e.message,
|
260
|
+
# e.backtrace.join("\n")
|
261
|
+
# end
|
262
|
+
end
|
263
|
+
|
264
|
+
def calculate_coverage_parameters(coverages, ref_length, mapped_reads, options)
|
265
|
+
n_mates = 1.0
|
266
|
+
n_mates = 2.0 if @paired
|
267
|
+
millions = @total_reads.inject { |sum, n| sum + n }/1.0e6
|
268
|
+
mean_normalized_differences = 0
|
269
|
+
mean_max = 0
|
270
|
+
mean_coverage = 0
|
271
|
+
proportion_sequence_mapped = 0
|
272
|
+
fpkm = 0
|
273
|
+
|
274
|
+
greater0 = coverages.select{|c| c > 0}
|
275
|
+
coverages_greater0 = greater0.length
|
276
|
+
if coverages_greater0 > 0
|
277
|
+
fpkm = mapped_reads/n_mates/(ref_length/1000.0)/millions
|
278
|
+
mean_coverage = coverages.inject { |sum, n| sum + n }.fdiv(ref_length)
|
279
|
+
n_max = (coverages.length/10.0).ceil
|
280
|
+
maximums = coverages.sort{|c1, c2| c2 <=> c1}[0..n_max-1]
|
281
|
+
mean_max = maximums.inject { |sum, n| sum + n }.fdiv(n_max)
|
282
|
+
|
283
|
+
mean_coverage_filtered = greater0.inject { |sum, n| sum + n }.fdiv(coverages_greater0)
|
284
|
+
normalized_differences = greater0.map{|c| (c - mean_coverage_filtered).abs/mean_coverage_filtered}
|
285
|
+
mean_normalized_differences = normalized_differences.inject { |sum, n| sum + n } / normalized_differences.length
|
286
|
+
proportion_sequence_mapped = greater0.length.fdiv(ref_length)
|
287
|
+
|
288
|
+
if options[:normalize_coverages]
|
289
|
+
max = coverages.max
|
290
|
+
coverages.map!{|cov| cov.fdiv(max) }
|
291
|
+
end
|
292
|
+
end
|
293
|
+
return mean_normalized_differences, mean_max, mean_coverage, proportion_sequence_mapped, fpkm
|
294
|
+
end
|
295
|
+
|
296
|
+
end
|
@@ -10,6 +10,7 @@ require 'artifacts'
|
|
10
10
|
require 'blast_functions'
|
11
11
|
require 'exonerate_result'
|
12
12
|
require 'scbi_fasta'
|
13
|
+
require 'mapping'
|
13
14
|
|
14
15
|
require 'fl_analysis'
|
15
16
|
include FlAnalysis
|
@@ -28,17 +29,67 @@ class MyWorker < ScbiMapreduce::Worker
|
|
28
29
|
$verbose = manager_options[:verbose]
|
29
30
|
end
|
30
31
|
|
31
|
-
def process_object(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
def process_object(initial_obj)
|
33
|
+
task = initial_obj.first.keys.first
|
34
|
+
obj = nil
|
35
|
+
if task == :fln
|
36
|
+
obj = initial_obj.map{|hash| hash[:fln]}
|
37
|
+
# Punto de arranque de FLN
|
38
|
+
$WORKER_LOG.info "Task: #{task}. Processing chunk: #{obj.first.seq_name}"
|
39
|
+
full_lenghter2(obj)
|
40
|
+
elsif task == :mapping
|
41
|
+
obj = initial_obj.first.values.first
|
42
|
+
$WORKER_LOG.info "Task: #{task}. Processing chunk: #{obj}"
|
43
|
+
obj = map_transcriptome(obj)
|
44
|
+
end
|
45
|
+
return {task => obj}
|
36
46
|
end
|
37
47
|
|
38
48
|
def closing_worker
|
39
49
|
|
40
50
|
end
|
41
51
|
|
52
|
+
#####################################################################################
|
53
|
+
# MAPPING METHODS
|
54
|
+
#####################################################################################
|
55
|
+
def map_transcriptome(initial_obj)
|
56
|
+
ref_file = initial_obj
|
57
|
+
prefix = File.basename(ref_file, '.fasta')
|
58
|
+
|
59
|
+
|
60
|
+
mapping2 = Mapping.new(
|
61
|
+
ref_fasta_path: File.join(@options[:temp_map_folder], ref_file),
|
62
|
+
threads: 1,
|
63
|
+
temp_folder: @options[:temp_map_folder]
|
64
|
+
)
|
65
|
+
$WORKER_LOG.info "Do bowtie ref"
|
66
|
+
|
67
|
+
mapping2.do_ref(
|
68
|
+
name: "#{prefix}_ref",
|
69
|
+
log: "#{prefix}_reference_log"
|
70
|
+
)
|
71
|
+
|
72
|
+
$WORKER_LOG.info "Do bowtie mapping"
|
73
|
+
mapping2.do_map(
|
74
|
+
files: @options[:files2map],
|
75
|
+
command: 'bowtie2 -p /THREADS/ -x /REFERENCE/ -a',
|
76
|
+
paired_pipe: '| samtools view -bS -f 2 | samtools sort -o /OUTPUT/',
|
77
|
+
single_pipe: '| samtools view -bS -F 4 | samtools sort -o /OUTPUT/',
|
78
|
+
additional_paired_flags: '--no-mixed',
|
79
|
+
output: File.join(@options[:temp_map_folder], "#{prefix}_map_data"),
|
80
|
+
log: File.join(@options[:temp_map_folder], "#{prefix}_mapping_log"),
|
81
|
+
)
|
82
|
+
|
83
|
+
$WORKER_LOG.info "Do samtools ref"
|
84
|
+
mapping2.index
|
85
|
+
$WORKER_LOG.info "Do idxstats"
|
86
|
+
mapping2.idxstats
|
87
|
+
$WORKER_LOG.info "Do mpileup and coverage analysis"
|
88
|
+
map_object2 = mapping2.mpileup('--ff' => 'UNMAP,QCFAIL')
|
89
|
+
$WORKER_LOG.info "Finished coverage analysis"
|
90
|
+
return map_object2
|
91
|
+
end
|
92
|
+
|
42
93
|
#####################################################################################
|
43
94
|
# FLN FUNCTIONS
|
44
95
|
#####################################################################################
|
@@ -49,15 +100,18 @@ class MyWorker < ScbiMapreduce::Worker
|
|
49
100
|
|
50
101
|
def full_lenghter2(seqs)
|
51
102
|
#seqs.map{|seq| seq.change_degenerated_nt!} # Clean degenerated nt
|
52
|
-
|
103
|
+
check_seqs = seqs
|
104
|
+
if !@options[:files2map].empty? && @options[:remove_unmapped]
|
105
|
+
check_seqs = check_mapping(seqs)
|
106
|
+
end
|
107
|
+
|
53
108
|
# User database
|
54
109
|
#--------------------------------------------
|
55
110
|
# if the user has included his own database in the parameters entry,
|
56
111
|
# the location of the database is tested, and blast and the results analysis is done
|
57
|
-
check_seqs = seqs
|
58
112
|
if @options[:user_db]
|
59
113
|
user_db = File.basename(@options[:user_db])
|
60
|
-
check_seqs = check_prot_db(
|
114
|
+
check_seqs = check_prot_db(check_seqs, @options[:user_db], 'blastx', 1, user_db, @options[:blast])
|
61
115
|
end
|
62
116
|
|
63
117
|
# UniProt (sp)
|
@@ -95,6 +149,14 @@ class MyWorker < ScbiMapreduce::Worker
|
|
95
149
|
#----------------------------------------------------------------------------------
|
96
150
|
# END MAIN
|
97
151
|
#----------------------------------------------------------------------------------
|
152
|
+
def check_mapping(seqs)
|
153
|
+
new_seqs = []
|
154
|
+
seqs.each do |s|
|
155
|
+
artifact?(s, nil, 'mapping', '', @options, new_seqs)
|
156
|
+
end
|
157
|
+
seqs.concat(new_seqs)
|
158
|
+
return seqs.select{|s| !s.ignore }
|
159
|
+
end
|
98
160
|
|
99
161
|
def check_prot_db(seqs, db_path, blast_type, evalue, db_name, additional_blast_options)
|
100
162
|
|
@@ -136,7 +198,7 @@ class MyWorker < ScbiMapreduce::Worker
|
|
136
198
|
$WORKER_LOG.info "DB: #{File.basename(database)} #{input.length}"
|
137
199
|
blast = BatchBlast.new("-db #{database}", blast_type, "-evalue #{evalue} #{additional_blast_options}")
|
138
200
|
chunk_name = input.first.seq_name.gsub(/\W+/,'_')
|
139
|
-
file_path = File.join(
|
201
|
+
file_path = File.join(@options[:temp], File.basename(database)+'_'+chunk_name)
|
140
202
|
if @options[:hdd] #Write/parse blast on Disk
|
141
203
|
file_name = file_path+'.blast' #Each blast is identified with database_name and first sequence's name on chunk
|
142
204
|
if !File.exists?(file_name)
|