seqtrimnext 2.0.51 → 2.0.52
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/Manifest.txt +3 -3
- data/README.rdoc +18 -3
- data/Rakefile +2 -1
- data/bin/parse_params.rb +5 -1
- data/bin/seqtrimnext +53 -21
- data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
- data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
- data/lib/seqtrimnext/classes/params.rb +109 -123
- data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
- data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
- data/lib/seqtrimnext/classes/sequence.rb +2 -2
- data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
- data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
- data/lib/seqtrimnext/plugins/plugin.rb +42 -12
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
- data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
- data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
- data/lib/seqtrimnext/templates/amplicons.txt +1 -8
- data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
- data/lib/seqtrimnext/templates/only_quality.txt +24 -0
- data/lib/seqtrimnext/templates/sanger.txt +25 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
- data/lib/seqtrimnext.rb +1 -1
- metadata +20 -7
- data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -11,20 +11,20 @@ require "scbi_math"
|
|
11
11
|
|
12
12
|
class ExtractStats
|
13
13
|
|
14
|
-
def initialize(
|
14
|
+
def initialize(sequence_readers,params)
|
15
15
|
|
16
16
|
@sequence_lengths = [] #array of sequences lengths
|
17
17
|
@length_frequency = [] #number of sequences of each size (frequency)
|
18
18
|
@keys={} #found keys
|
19
19
|
@params = params
|
20
|
-
@use_qual=
|
20
|
+
@use_qual=sequence_readers.first.with_qual?
|
21
21
|
# @params.get_param('use_qual')
|
22
22
|
|
23
23
|
@totalnt=0
|
24
24
|
@qv=[]
|
25
25
|
|
26
26
|
|
27
|
-
@sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(
|
27
|
+
@sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(sequence_readers)
|
28
28
|
|
29
29
|
|
30
30
|
set_params_and_results
|
@@ -37,30 +37,33 @@ class ExtractStats
|
|
37
37
|
|
38
38
|
end
|
39
39
|
|
40
|
-
def extract_stats_from_sequences(
|
40
|
+
def extract_stats_from_sequences(sequence_readers)
|
41
|
+
sequence_readers.each do |sequence_reader|
|
41
42
|
|
42
|
-
sequence_reader.each do |name_seq,fasta_seq,qual|
|
43
|
-
l = fasta_seq.length
|
44
43
|
|
45
|
-
|
44
|
+
sequence_reader.each do |name_seq,fasta_seq,qual|
|
45
|
+
l = fasta_seq.length
|
46
46
|
|
47
|
-
|
48
|
-
@sequence_lengths.push l
|
47
|
+
@totalnt+=l
|
49
48
|
|
50
|
-
|
51
|
-
|
49
|
+
#save all lengths
|
50
|
+
@sequence_lengths.push l
|
52
51
|
|
53
|
-
|
54
|
-
|
52
|
+
# add key value
|
53
|
+
add_key(fasta_seq[0..3].upcase)
|
55
54
|
|
56
|
-
|
57
|
-
|
55
|
+
# add fasta length
|
56
|
+
@length_frequency[fasta_seq.length] = (@length_frequency[fasta_seq.length] || 1 ) + 1
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
#extract qv values
|
59
|
+
extract_qv_from_sequence(qual) if @use_qual
|
60
|
+
|
61
|
+
# print some progress info
|
62
|
+
if (sequence_reader.num_seqs % 10000==0)
|
63
|
+
puts "Calculating stats: #{sequence_reader.num_seqs}"
|
64
|
+
end
|
63
65
|
|
66
|
+
end
|
64
67
|
end
|
65
68
|
|
66
69
|
length_stats = ScbiNArray.to_na(@sequence_lengths)
|
@@ -74,9 +77,9 @@ class ExtractStats
|
|
74
77
|
|
75
78
|
## PLOT RESULTS
|
76
79
|
if !File.exists?('graphs')
|
77
|
-
|
80
|
+
Dir.mkdir('graphs')
|
78
81
|
end
|
79
|
-
|
82
|
+
|
80
83
|
|
81
84
|
x = []
|
82
85
|
y = []
|
@@ -107,10 +110,10 @@ class ExtractStats
|
|
107
110
|
def plot_qualities
|
108
111
|
|
109
112
|
if !File.exists?('graphs')
|
110
|
-
|
113
|
+
Dir.mkdir('graphs')
|
111
114
|
end
|
112
115
|
minimum_qual_value = @params.get_param('min_quality').to_i
|
113
|
-
|
116
|
+
|
114
117
|
# get qualities values
|
115
118
|
x=[]
|
116
119
|
y=[]
|
@@ -127,7 +127,7 @@ class Params
|
|
127
127
|
|
128
128
|
# line doesn't finish in *
|
129
129
|
if (line[0]!='>'[0]) && (!(line =~ /\*$/))
|
130
|
-
|
130
|
+
|
131
131
|
#puts line
|
132
132
|
# puts line,line[0]
|
133
133
|
if line =~ />([^\.]+)\.\.\.\s/
|
@@ -189,12 +189,12 @@ class Params
|
|
189
189
|
|
190
190
|
def get_fasta(list,name,type)
|
191
191
|
res = list[name]
|
192
|
-
|
192
|
+
|
193
193
|
if res.nil?
|
194
194
|
$LOG.error("Error. The #{type}: #{name} was not correctly loaded")
|
195
195
|
raise "Error. The #{type}: #{name} was not found in loaded #{name}s: #{list.map{|k,v| k}}."
|
196
196
|
end
|
197
|
-
|
197
|
+
|
198
198
|
return res
|
199
199
|
end
|
200
200
|
|
@@ -231,165 +231,151 @@ class Params
|
|
231
231
|
line = Regexp.last_match[2].to_i
|
232
232
|
method = Regexp.last_match[3]
|
233
233
|
plugin=File.basename(file,File.extname(file))
|
234
|
-
|
235
|
-
# puts "CALLER: #{plugin}"
|
236
|
-
# puts [file, line, method]
|
237
234
|
|
238
235
|
end
|
239
236
|
|
240
237
|
end
|
241
238
|
|
242
|
-
def set_param(param,value,comment=nil)
|
243
|
-
|
239
|
+
def set_param(param,value,comment = nil)
|
240
|
+
plugin=get_plugin
|
244
241
|
|
245
|
-
|
242
|
+
@params[param] = value
|
246
243
|
|
247
|
-
|
248
|
-
|
249
|
-
|
244
|
+
if get_comment(plugin,param).nil?
|
245
|
+
set_comment(plugin,param,comment)
|
246
|
+
end
|
250
247
|
|
251
248
|
|
252
|
-
|
253
|
-
|
254
|
-
# def set_order(plugin,param)
|
255
|
-
#
|
256
|
-
# if @param_order[plugin].nil?
|
257
|
-
# @param_order[plugin]=[]
|
258
|
-
# end
|
259
|
-
#
|
260
|
-
# if !@param_order[plugin].index(param)
|
261
|
-
# @param_order[plugin].push param
|
262
|
-
# end
|
263
|
-
# end
|
264
|
-
|
249
|
+
end
|
265
250
|
|
266
251
|
def get_comment(plugin,param)
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
252
|
+
res = nil
|
253
|
+
if @plugin_comments[plugin]
|
254
|
+
res =@plugin_comments[plugin][param]
|
255
|
+
end
|
256
|
+
return res
|
257
|
+
end
|
273
258
|
|
274
259
|
|
275
260
|
def set_comment(plugin,param,comment)
|
276
|
-
|
277
|
-
|
278
|
-
|
261
|
+
if !comment.is_a?(Array) && !comment.nil?
|
262
|
+
comment=comment.split("\n").compact.map{|l| l.strip}
|
263
|
+
end
|
279
264
|
|
280
|
-
|
281
|
-
|
282
|
-
|
265
|
+
if @plugin_comments[plugin].nil?
|
266
|
+
@plugin_comments[plugin]={}
|
267
|
+
end
|
283
268
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
269
|
+
old_comment=''
|
270
|
+
# remove from other plugins
|
271
|
+
@plugin_comments.each do |plugin_name,comments|
|
272
|
+
if comments.keys.include?(param) && plugin_name!=plugin
|
273
|
+
old_comment=comments[param]
|
274
|
+
comments.delete(param)
|
275
|
+
end
|
276
|
+
end
|
292
277
|
|
293
|
-
|
294
|
-
|
295
|
-
|
278
|
+
if comment.nil?
|
279
|
+
comment=old_comment
|
280
|
+
end
|
296
281
|
|
297
|
-
|
298
|
-
|
299
|
-
|
282
|
+
# @comments[param]=(comment || [''])
|
283
|
+
@plugin_comments[plugin][param]=(comment || [''])
|
284
|
+
# puts @plugin_comments.keys.to_json
|
300
285
|
|
301
|
-
|
286
|
+
# remove empty comments
|
302
287
|
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
288
|
+
@plugin_comments.reverse_each do |plugin_name,comments|
|
289
|
+
if comments.empty?
|
290
|
+
@plugin_comments.delete(plugin_name)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
end
|
308
295
|
|
309
|
-
end
|
310
|
-
|
311
296
|
|
312
297
|
def set_mid(param,value)
|
313
|
-
|
314
|
-
|
315
|
-
#attr_accessor :h # to accede to the atribute 'h' from out of this class
|
298
|
+
@mids[param] = value
|
299
|
+
end
|
316
300
|
|
317
301
|
# Returns true if exists the parameter and nil if don't
|
318
302
|
def exists?(param_name)
|
319
|
-
|
320
|
-
|
321
|
-
|
303
|
+
return !@params[param_name].nil?
|
304
|
+
end
|
305
|
+
|
322
306
|
def check_plugin_list_param(errors,param_name)
|
323
|
-
|
324
|
-
|
307
|
+
# get plugin list
|
308
|
+
pl_list=get_param(param_name)
|
325
309
|
|
326
|
-
|
327
|
-
|
310
|
+
# puts pl_list,param_name
|
311
|
+
list=pl_list.split(',')
|
328
312
|
|
329
|
-
|
313
|
+
list.map!{|e| e.strip}
|
330
314
|
|
331
|
-
|
315
|
+
# puts "Lista:",list.join(',')
|
332
316
|
|
333
317
|
|
334
|
-
|
335
|
-
|
336
|
-
|
318
|
+
# always the pluginExtractInserts at the end
|
319
|
+
list.delete('PluginExtractInserts')
|
320
|
+
list << 'PluginExtractInserts'
|
337
321
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
322
|
+
set_param(param_name,list.join(','))
|
323
|
+
# if !list.include?('PluginExtractInserts')
|
324
|
+
# raise "PluginExtractInserts do not exists"
|
325
|
+
#
|
326
|
+
# end
|
343
327
|
|
344
328
|
|
345
329
|
|
346
|
-
|
330
|
+
end
|
347
331
|
|
348
332
|
# def split_databases(db_param_name)
|
349
333
|
def check_db_param(errors,db_param_name)
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
334
|
+
if !get_param(db_param_name).empty?
|
335
|
+
# expand database paths
|
336
|
+
dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
|
337
|
+
# puts "ALGO"*20
|
338
|
+
# puts "INPUT DATABASES:\n"+dbs.join(',')
|
339
|
+
|
340
|
+
procesed_dbs=[]
|
341
|
+
#
|
342
|
+
# TODO - chequear aqui que la db no esta vacia y que esta formateada.
|
343
|
+
dbs.reverse_each {|db_p|
|
344
|
+
db=File.expand_path(db_p)
|
345
|
+
|
346
|
+
if !File.exists?(db)
|
347
|
+
path=File.join($FORMATTED_DB_PATH,db_p)
|
348
|
+
else
|
349
|
+
path=db
|
350
|
+
end
|
351
|
+
|
352
|
+
|
353
|
+
if Dir.glob(path+'*.n*').entries.empty?
|
354
|
+
puts "DB file #{path} not formatted"
|
355
|
+
|
356
|
+
if File.writable_real?(path)
|
357
|
+
cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
|
358
|
+
system(cmd)
|
359
|
+
else
|
360
|
+
raise "Can't format database. We don't have write permissions in: #{path}"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
procesed_dbs << path
|
365
|
+
|
366
|
+
if !File.exists?(path)
|
367
|
+
raise "DB File #{path} does not exists"
|
368
|
+
# exit
|
369
|
+
end
|
370
|
+
}
|
371
|
+
|
372
|
+
db_paths = '"'+procesed_dbs.join(' ')+'"'
|
373
|
+
|
374
|
+
set_param(db_param_name,db_paths)
|
375
|
+
|
376
|
+
puts "USED DATABASES\n"+db_paths
|
377
|
+
end
|
378
|
+
end
|
393
379
|
|
394
380
|
|
395
381
|
def self.generate_sample_params
|
@@ -29,7 +29,6 @@ class PluginManager
|
|
29
29
|
|
30
30
|
# keeps a list of rejected sequences
|
31
31
|
|
32
|
-
|
33
32
|
rejected_seqs = []
|
34
33
|
|
35
34
|
@plugin_names.each do |plugin_name|
|
@@ -51,11 +50,10 @@ class PluginManager
|
|
51
50
|
|
52
51
|
# Creates an instance of the respective plugin stored in "plugin_name",and asociate it to the sequence 'seq'
|
53
52
|
plugin_class = Object.const_get(plugin_name)
|
54
|
-
|
53
|
+
|
55
54
|
plugin_execution=plugin_class.new(running_seqs,@params)
|
56
|
-
#puts plugin_name+':'+ plugin_execution.stats.to_json
|
57
|
-
running_seqs.stats[plugin_name] = plugin_execution.stats
|
58
55
|
|
56
|
+
running_seqs.stats[plugin_name] = plugin_execution.stats
|
59
57
|
|
60
58
|
# puts running_seqs.stats.to_json
|
61
59
|
plugin_execution=nil
|
@@ -175,22 +175,14 @@ class Seqtrim
|
|
175
175
|
# it is the server part
|
176
176
|
if !only_workers then
|
177
177
|
|
178
|
-
sequence_reader = nil
|
179
178
|
cd_hit_input_file = nil
|
180
179
|
|
181
180
|
# TODO - FIX seqtrim to not iterate two times over input, so STDIN can be used
|
182
|
-
|
181
|
+
sequence_readers=[]
|
182
|
+
|
183
183
|
# open sequence reader and expand input files paths
|
184
184
|
if options[:fastq]
|
185
185
|
|
186
|
-
if options[:fastq]=='-'
|
187
|
-
seqs_path = STDIN
|
188
|
-
else
|
189
|
-
seqs_path = File.expand_path(options[:fastq])
|
190
|
-
end
|
191
|
-
|
192
|
-
cd_hit_input_file = seqs_path
|
193
|
-
|
194
186
|
# choose fastq quality format
|
195
187
|
format=:sanger
|
196
188
|
|
@@ -203,13 +195,23 @@ class Seqtrim
|
|
203
195
|
format = :sanger
|
204
196
|
end
|
205
197
|
|
198
|
+
seqs_path=''
|
199
|
+
|
206
200
|
$LOG.info("Used FastQ format for input files: #{format}")
|
201
|
+
# iterate files
|
202
|
+
options[:fastq].each do |fastq_file|
|
203
|
+
|
204
|
+
if fastq_file=='-'
|
205
|
+
seqs_path = STDIN
|
206
|
+
else
|
207
|
+
seqs_path = File.expand_path(fastq_file)
|
208
|
+
end
|
209
|
+
|
210
|
+
sequence_readers << FastqFile.new(seqs_path,'r',format, true)
|
211
|
+
|
212
|
+
end
|
207
213
|
|
208
|
-
sequence_reader = FastqFile.new(seqs_path,'r',format, true)
|
209
|
-
# cd_hit_input_file = 'cd-hit-input.fasta'
|
210
214
|
cd_hit_input_file = seqs_path
|
211
|
-
# $LOG.info "Converting input file for cd-hit-454"
|
212
|
-
# $LOG.info "Conversion done"
|
213
215
|
|
214
216
|
else
|
215
217
|
|
@@ -217,7 +219,7 @@ class Seqtrim
|
|
217
219
|
cd_hit_input_file = seqs_path
|
218
220
|
|
219
221
|
qual_path = File.expand_path(options[:qual]) if qual_path
|
220
|
-
|
222
|
+
sequence_readers << FastaQualFile.new(options[:fasta],options[:qual],true)
|
221
223
|
|
222
224
|
end
|
223
225
|
|
@@ -250,12 +252,11 @@ class Seqtrim
|
|
250
252
|
$LOG.error "Plugin check failed"
|
251
253
|
|
252
254
|
# save used params to file
|
253
|
-
|
255
|
+
params.save_file('used_params.txt')
|
254
256
|
|
255
257
|
exit
|
256
258
|
end
|
257
259
|
|
258
|
-
|
259
260
|
if !Dir.exists?(OUTPUT_PATH)
|
260
261
|
Dir.mkdir(OUTPUT_PATH)
|
261
262
|
end
|
@@ -263,8 +264,7 @@ class Seqtrim
|
|
263
264
|
# Extract global stats
|
264
265
|
if params.get_param('generate_initial_stats')=='true'
|
265
266
|
$LOG.info "Calculatings stats"
|
266
|
-
|
267
|
-
ExtractStats.new(sequence_reader,params)
|
267
|
+
ExtractStats.new(sequence_readers,params)
|
268
268
|
else
|
269
269
|
$LOG.info "Skipping calculatings stats phase."
|
270
270
|
end
|
@@ -274,14 +274,6 @@ class Seqtrim
|
|
274
274
|
params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
|
275
275
|
|
276
276
|
piro_on = (params.get_param('next_generation_sequences')=='true')
|
277
|
-
|
278
|
-
# format blast database with truncated file
|
279
|
-
#MakeBlastDb.format_db(es.truncated_file_path,File.basename(es.truncated_file_path,File.extname(es.truncated_file_path)),'./') if piro_on
|
280
|
-
|
281
|
-
# leer mids
|
282
|
-
# params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
|
283
|
-
# params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
|
284
|
-
# params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
|
285
277
|
|
286
278
|
params.load_mids(params.get_param('mids_db'))
|
287
279
|
params.load_ab_adapters(params.get_param('adapters_ab_db'))
|
@@ -334,7 +326,7 @@ class Seqtrim
|
|
334
326
|
else
|
335
327
|
$LOG.info 'Starting server'
|
336
328
|
|
337
|
-
SeqtrimWorkManager.init_work_manager(
|
329
|
+
SeqtrimWorkManager.init_work_manager(sequence_readers, params,chunk_size,use_json,options[:skip_output])
|
338
330
|
|
339
331
|
begin
|
340
332
|
cpus=1
|
@@ -371,7 +363,10 @@ class Seqtrim
|
|
371
363
|
server.start_server
|
372
364
|
|
373
365
|
# close sequence reader
|
374
|
-
|
366
|
+
sequence_readers.each do |file|
|
367
|
+
file.close
|
368
|
+
end
|
369
|
+
|
375
370
|
$LOG.info 'Closing server'
|
376
371
|
end
|
377
372
|
|
@@ -10,6 +10,7 @@ class SequenceGroup
|
|
10
10
|
@seqs=seqs
|
11
11
|
@output_text={}
|
12
12
|
@output_files={}
|
13
|
+
|
13
14
|
end
|
14
15
|
|
15
16
|
|
@@ -31,6 +32,13 @@ class SequenceGroup
|
|
31
32
|
yield seq
|
32
33
|
end
|
33
34
|
end
|
35
|
+
|
36
|
+
def each_slice(n)
|
37
|
+
@seqs.each_slice(n) do |seqs|
|
38
|
+
yield seqs
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
34
42
|
|
35
43
|
def each_with_index
|
36
44
|
@seqs.each_with_index do |seq,i|
|
@@ -46,7 +54,19 @@ class SequenceGroup
|
|
46
54
|
end
|
47
55
|
|
48
56
|
def add(array)
|
49
|
-
@seqs
|
57
|
+
@seqs = @seqs + array
|
58
|
+
|
59
|
+
# sort by tuple_id and order in tuple
|
60
|
+
@seqs.sort! do |a,b|
|
61
|
+
comp = (a.tuple_id <=> b.tuple_id)
|
62
|
+
comp.zero? ? (a.order_in_tuple <=> b.order_in_tuple) : comp
|
63
|
+
end
|
64
|
+
|
65
|
+
# print
|
66
|
+
# @seqs.each do |s|
|
67
|
+
# puts "TID:#{s.tuple_id}, OIT: #{s.order_in_tuple}"
|
68
|
+
# end
|
69
|
+
|
50
70
|
end
|
51
71
|
|
52
72
|
def count
|
@@ -14,14 +14,13 @@ include Term::ANSIColor
|
|
14
14
|
class SequenceWithAction < Sequence
|
15
15
|
SHOW_QUAL = false
|
16
16
|
SHOW_FINAL_INSERTS=true
|
17
|
-
|
17
|
+
|
18
|
+
attr_accessor :actions,:seq_fasta_orig, :seq_qual_orig ,:insert_start , :insert_end, :stats , :insert_start_last , :insert_end_last, :order_in_tuple, :tuple_id, :tuple_size
|
18
19
|
|
19
20
|
# Creates an instance with the structure to storage the actions associated to a sequence
|
20
21
|
def initialize(seq_name,seq_fasta,seq_qual, seq_comment = '')
|
21
22
|
super
|
22
|
-
|
23
|
-
#if @ns_present then $LOG.debug "The sequence #{seq_name} has N's" else $LOG.debug "The sequence #{seq_name} hasn't N's" end
|
24
|
-
#if @xs_present then $LOG.debug "The sequence #{seq_name} has X's" else $LOG.debug "The sequence #{seq_name} hasn't X's" end
|
23
|
+
|
25
24
|
@actions = []
|
26
25
|
@seq_fasta_orig = seq_fasta
|
27
26
|
@seq_fasta = seq_fasta
|
@@ -32,18 +31,31 @@ class SequenceWithAction < Sequence
|
|
32
31
|
@insert_start = 0
|
33
32
|
@insert_end = seq_fasta.length-1
|
34
33
|
|
35
|
-
|
36
|
-
#@seq_qual = seq_qual
|
37
|
-
|
38
34
|
@stats={}
|
35
|
+
@comments=[]
|
39
36
|
|
40
37
|
@file_tags=[]
|
41
38
|
|
39
|
+
# for paired ends
|
40
|
+
@order_in_tuple=0
|
41
|
+
@tuple_id=0
|
42
|
+
@tuple_size=0
|
43
|
+
@file_tag_tuple_priority=0
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_comment(comment)
|
48
|
+
@comments.push comment
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_comment_line
|
52
|
+
return ([@seq_rejected_by_message]+@comments).compact.join(';')
|
42
53
|
end
|
43
54
|
|
44
55
|
# add a file tag to sequence
|
45
|
-
def add_file_tag(tag_level, tag_value, tag_type)
|
56
|
+
def add_file_tag(tag_level, tag_value, tag_type, priority=0)
|
46
57
|
@file_tags<< {:level => tag_level, :name => tag_value, :type=> tag_type}
|
58
|
+
@file_tag_tuple_priority=priority
|
47
59
|
end
|
48
60
|
|
49
61
|
# join file tags into a path
|
@@ -71,7 +83,7 @@ class SequenceWithAction < Sequence
|
|
71
83
|
|
72
84
|
# puts "#{dirname}, #{filename}"
|
73
85
|
|
74
|
-
return [dirname,filename]
|
86
|
+
return [dirname,filename,@file_tag_tuple_priority]
|
75
87
|
|
76
88
|
end
|
77
89
|
|
@@ -328,16 +340,16 @@ class SequenceWithAction < Sequence
|
|
328
340
|
output_res=[]
|
329
341
|
|
330
342
|
if @seq_rejected
|
331
|
-
output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
|
343
|
+
output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
|
332
344
|
# puts @seq_name.bold + bold + ' REJECTED BECAUSE ' +@seq_rejected_by_message.bold if @seq_rejected
|
333
345
|
else
|
334
|
-
output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline
|
346
|
+
output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline
|
335
347
|
|
336
|
-
end
|
348
|
+
end
|
337
349
|
|
338
350
|
n=1
|
339
351
|
withMessage = ["ActionIsContaminated","ActionVectors","ActionBadAdapter","ActionLeftAdapter","ActionRightAdapter"]
|
340
|
-
color = red
|
352
|
+
color = red
|
341
353
|
|
342
354
|
@actions.sort!{|e,f| e.start_pos<=>f.start_pos}.each do |a|
|
343
355
|
a_type=a.action_type
|