seqtrimnext 2.0.51 → 2.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/Manifest.txt +3 -3
- data/README.rdoc +18 -3
- data/Rakefile +2 -1
- data/bin/parse_params.rb +5 -1
- data/bin/seqtrimnext +53 -21
- data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
- data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
- data/lib/seqtrimnext/classes/params.rb +109 -123
- data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
- data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
- data/lib/seqtrimnext/classes/sequence.rb +2 -2
- data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
- data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
- data/lib/seqtrimnext/plugins/plugin.rb +42 -12
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
- data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
- data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
- data/lib/seqtrimnext/templates/amplicons.txt +1 -8
- data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
- data/lib/seqtrimnext/templates/only_quality.txt +24 -0
- data/lib/seqtrimnext/templates/sanger.txt +25 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
- data/lib/seqtrimnext.rb +1 -1
- metadata +20 -7
- data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -11,20 +11,20 @@ require "scbi_math"
|
|
11
11
|
|
12
12
|
class ExtractStats
|
13
13
|
|
14
|
-
def initialize(
|
14
|
+
def initialize(sequence_readers,params)
|
15
15
|
|
16
16
|
@sequence_lengths = [] #array of sequences lengths
|
17
17
|
@length_frequency = [] #number of sequences of each size (frequency)
|
18
18
|
@keys={} #found keys
|
19
19
|
@params = params
|
20
|
-
@use_qual=
|
20
|
+
@use_qual=sequence_readers.first.with_qual?
|
21
21
|
# @params.get_param('use_qual')
|
22
22
|
|
23
23
|
@totalnt=0
|
24
24
|
@qv=[]
|
25
25
|
|
26
26
|
|
27
|
-
@sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(
|
27
|
+
@sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(sequence_readers)
|
28
28
|
|
29
29
|
|
30
30
|
set_params_and_results
|
@@ -37,30 +37,33 @@ class ExtractStats
|
|
37
37
|
|
38
38
|
end
|
39
39
|
|
40
|
-
def extract_stats_from_sequences(
|
40
|
+
def extract_stats_from_sequences(sequence_readers)
|
41
|
+
sequence_readers.each do |sequence_reader|
|
41
42
|
|
42
|
-
sequence_reader.each do |name_seq,fasta_seq,qual|
|
43
|
-
l = fasta_seq.length
|
44
43
|
|
45
|
-
|
44
|
+
sequence_reader.each do |name_seq,fasta_seq,qual|
|
45
|
+
l = fasta_seq.length
|
46
46
|
|
47
|
-
|
48
|
-
@sequence_lengths.push l
|
47
|
+
@totalnt+=l
|
49
48
|
|
50
|
-
|
51
|
-
|
49
|
+
#save all lengths
|
50
|
+
@sequence_lengths.push l
|
52
51
|
|
53
|
-
|
54
|
-
|
52
|
+
# add key value
|
53
|
+
add_key(fasta_seq[0..3].upcase)
|
55
54
|
|
56
|
-
|
57
|
-
|
55
|
+
# add fasta length
|
56
|
+
@length_frequency[fasta_seq.length] = (@length_frequency[fasta_seq.length] || 1 ) + 1
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
#extract qv values
|
59
|
+
extract_qv_from_sequence(qual) if @use_qual
|
60
|
+
|
61
|
+
# print some progress info
|
62
|
+
if (sequence_reader.num_seqs % 10000==0)
|
63
|
+
puts "Calculating stats: #{sequence_reader.num_seqs}"
|
64
|
+
end
|
63
65
|
|
66
|
+
end
|
64
67
|
end
|
65
68
|
|
66
69
|
length_stats = ScbiNArray.to_na(@sequence_lengths)
|
@@ -74,9 +77,9 @@ class ExtractStats
|
|
74
77
|
|
75
78
|
## PLOT RESULTS
|
76
79
|
if !File.exists?('graphs')
|
77
|
-
|
80
|
+
Dir.mkdir('graphs')
|
78
81
|
end
|
79
|
-
|
82
|
+
|
80
83
|
|
81
84
|
x = []
|
82
85
|
y = []
|
@@ -107,10 +110,10 @@ class ExtractStats
|
|
107
110
|
def plot_qualities
|
108
111
|
|
109
112
|
if !File.exists?('graphs')
|
110
|
-
|
113
|
+
Dir.mkdir('graphs')
|
111
114
|
end
|
112
115
|
minimum_qual_value = @params.get_param('min_quality').to_i
|
113
|
-
|
116
|
+
|
114
117
|
# get qualities values
|
115
118
|
x=[]
|
116
119
|
y=[]
|
@@ -127,7 +127,7 @@ class Params
|
|
127
127
|
|
128
128
|
# line doesn't finish in *
|
129
129
|
if (line[0]!='>'[0]) && (!(line =~ /\*$/))
|
130
|
-
|
130
|
+
|
131
131
|
#puts line
|
132
132
|
# puts line,line[0]
|
133
133
|
if line =~ />([^\.]+)\.\.\.\s/
|
@@ -189,12 +189,12 @@ class Params
|
|
189
189
|
|
190
190
|
def get_fasta(list,name,type)
|
191
191
|
res = list[name]
|
192
|
-
|
192
|
+
|
193
193
|
if res.nil?
|
194
194
|
$LOG.error("Error. The #{type}: #{name} was not correctly loaded")
|
195
195
|
raise "Error. The #{type}: #{name} was not found in loaded #{name}s: #{list.map{|k,v| k}}."
|
196
196
|
end
|
197
|
-
|
197
|
+
|
198
198
|
return res
|
199
199
|
end
|
200
200
|
|
@@ -231,165 +231,151 @@ class Params
|
|
231
231
|
line = Regexp.last_match[2].to_i
|
232
232
|
method = Regexp.last_match[3]
|
233
233
|
plugin=File.basename(file,File.extname(file))
|
234
|
-
|
235
|
-
# puts "CALLER: #{plugin}"
|
236
|
-
# puts [file, line, method]
|
237
234
|
|
238
235
|
end
|
239
236
|
|
240
237
|
end
|
241
238
|
|
242
|
-
def set_param(param,value,comment=nil)
|
243
|
-
|
239
|
+
def set_param(param,value,comment = nil)
|
240
|
+
plugin=get_plugin
|
244
241
|
|
245
|
-
|
242
|
+
@params[param] = value
|
246
243
|
|
247
|
-
|
248
|
-
|
249
|
-
|
244
|
+
if get_comment(plugin,param).nil?
|
245
|
+
set_comment(plugin,param,comment)
|
246
|
+
end
|
250
247
|
|
251
248
|
|
252
|
-
|
253
|
-
|
254
|
-
# def set_order(plugin,param)
|
255
|
-
#
|
256
|
-
# if @param_order[plugin].nil?
|
257
|
-
# @param_order[plugin]=[]
|
258
|
-
# end
|
259
|
-
#
|
260
|
-
# if !@param_order[plugin].index(param)
|
261
|
-
# @param_order[plugin].push param
|
262
|
-
# end
|
263
|
-
# end
|
264
|
-
|
249
|
+
end
|
265
250
|
|
266
251
|
def get_comment(plugin,param)
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
252
|
+
res = nil
|
253
|
+
if @plugin_comments[plugin]
|
254
|
+
res =@plugin_comments[plugin][param]
|
255
|
+
end
|
256
|
+
return res
|
257
|
+
end
|
273
258
|
|
274
259
|
|
275
260
|
def set_comment(plugin,param,comment)
|
276
|
-
|
277
|
-
|
278
|
-
|
261
|
+
if !comment.is_a?(Array) && !comment.nil?
|
262
|
+
comment=comment.split("\n").compact.map{|l| l.strip}
|
263
|
+
end
|
279
264
|
|
280
|
-
|
281
|
-
|
282
|
-
|
265
|
+
if @plugin_comments[plugin].nil?
|
266
|
+
@plugin_comments[plugin]={}
|
267
|
+
end
|
283
268
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
269
|
+
old_comment=''
|
270
|
+
# remove from other plugins
|
271
|
+
@plugin_comments.each do |plugin_name,comments|
|
272
|
+
if comments.keys.include?(param) && plugin_name!=plugin
|
273
|
+
old_comment=comments[param]
|
274
|
+
comments.delete(param)
|
275
|
+
end
|
276
|
+
end
|
292
277
|
|
293
|
-
|
294
|
-
|
295
|
-
|
278
|
+
if comment.nil?
|
279
|
+
comment=old_comment
|
280
|
+
end
|
296
281
|
|
297
|
-
|
298
|
-
|
299
|
-
|
282
|
+
# @comments[param]=(comment || [''])
|
283
|
+
@plugin_comments[plugin][param]=(comment || [''])
|
284
|
+
# puts @plugin_comments.keys.to_json
|
300
285
|
|
301
|
-
|
286
|
+
# remove empty comments
|
302
287
|
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
288
|
+
@plugin_comments.reverse_each do |plugin_name,comments|
|
289
|
+
if comments.empty?
|
290
|
+
@plugin_comments.delete(plugin_name)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
end
|
308
295
|
|
309
|
-
end
|
310
|
-
|
311
296
|
|
312
297
|
def set_mid(param,value)
|
313
|
-
|
314
|
-
|
315
|
-
#attr_accessor :h # to accede to the atribute 'h' from out of this class
|
298
|
+
@mids[param] = value
|
299
|
+
end
|
316
300
|
|
317
301
|
# Returns true if exists the parameter and nil if don't
|
318
302
|
def exists?(param_name)
|
319
|
-
|
320
|
-
|
321
|
-
|
303
|
+
return !@params[param_name].nil?
|
304
|
+
end
|
305
|
+
|
322
306
|
def check_plugin_list_param(errors,param_name)
|
323
|
-
|
324
|
-
|
307
|
+
# get plugin list
|
308
|
+
pl_list=get_param(param_name)
|
325
309
|
|
326
|
-
|
327
|
-
|
310
|
+
# puts pl_list,param_name
|
311
|
+
list=pl_list.split(',')
|
328
312
|
|
329
|
-
|
313
|
+
list.map!{|e| e.strip}
|
330
314
|
|
331
|
-
|
315
|
+
# puts "Lista:",list.join(',')
|
332
316
|
|
333
317
|
|
334
|
-
|
335
|
-
|
336
|
-
|
318
|
+
# always the pluginExtractInserts at the end
|
319
|
+
list.delete('PluginExtractInserts')
|
320
|
+
list << 'PluginExtractInserts'
|
337
321
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
322
|
+
set_param(param_name,list.join(','))
|
323
|
+
# if !list.include?('PluginExtractInserts')
|
324
|
+
# raise "PluginExtractInserts do not exists"
|
325
|
+
#
|
326
|
+
# end
|
343
327
|
|
344
328
|
|
345
329
|
|
346
|
-
|
330
|
+
end
|
347
331
|
|
348
332
|
# def split_databases(db_param_name)
|
349
333
|
def check_db_param(errors,db_param_name)
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
334
|
+
if !get_param(db_param_name).empty?
|
335
|
+
# expand database paths
|
336
|
+
dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
|
337
|
+
# puts "ALGO"*20
|
338
|
+
# puts "INPUT DATABASES:\n"+dbs.join(',')
|
339
|
+
|
340
|
+
procesed_dbs=[]
|
341
|
+
#
|
342
|
+
# TODO - chequear aqui que la db no esta vacia y que esta formateada.
|
343
|
+
dbs.reverse_each {|db_p|
|
344
|
+
db=File.expand_path(db_p)
|
345
|
+
|
346
|
+
if !File.exists?(db)
|
347
|
+
path=File.join($FORMATTED_DB_PATH,db_p)
|
348
|
+
else
|
349
|
+
path=db
|
350
|
+
end
|
351
|
+
|
352
|
+
|
353
|
+
if Dir.glob(path+'*.n*').entries.empty?
|
354
|
+
puts "DB file #{path} not formatted"
|
355
|
+
|
356
|
+
if File.writable_real?(path)
|
357
|
+
cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
|
358
|
+
system(cmd)
|
359
|
+
else
|
360
|
+
raise "Can't format database. We don't have write permissions in: #{path}"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
procesed_dbs << path
|
365
|
+
|
366
|
+
if !File.exists?(path)
|
367
|
+
raise "DB File #{path} does not exists"
|
368
|
+
# exit
|
369
|
+
end
|
370
|
+
}
|
371
|
+
|
372
|
+
db_paths = '"'+procesed_dbs.join(' ')+'"'
|
373
|
+
|
374
|
+
set_param(db_param_name,db_paths)
|
375
|
+
|
376
|
+
puts "USED DATABASES\n"+db_paths
|
377
|
+
end
|
378
|
+
end
|
393
379
|
|
394
380
|
|
395
381
|
def self.generate_sample_params
|
@@ -29,7 +29,6 @@ class PluginManager
|
|
29
29
|
|
30
30
|
# keeps a list of rejected sequences
|
31
31
|
|
32
|
-
|
33
32
|
rejected_seqs = []
|
34
33
|
|
35
34
|
@plugin_names.each do |plugin_name|
|
@@ -51,11 +50,10 @@ class PluginManager
|
|
51
50
|
|
52
51
|
# Creates an instance of the respective plugin stored in "plugin_name",and asociate it to the sequence 'seq'
|
53
52
|
plugin_class = Object.const_get(plugin_name)
|
54
|
-
|
53
|
+
|
55
54
|
plugin_execution=plugin_class.new(running_seqs,@params)
|
56
|
-
#puts plugin_name+':'+ plugin_execution.stats.to_json
|
57
|
-
running_seqs.stats[plugin_name] = plugin_execution.stats
|
58
55
|
|
56
|
+
running_seqs.stats[plugin_name] = plugin_execution.stats
|
59
57
|
|
60
58
|
# puts running_seqs.stats.to_json
|
61
59
|
plugin_execution=nil
|
@@ -175,22 +175,14 @@ class Seqtrim
|
|
175
175
|
# it is the server part
|
176
176
|
if !only_workers then
|
177
177
|
|
178
|
-
sequence_reader = nil
|
179
178
|
cd_hit_input_file = nil
|
180
179
|
|
181
180
|
# TODO - FIX seqtrim to not iterate two times over input, so STDIN can be used
|
182
|
-
|
181
|
+
sequence_readers=[]
|
182
|
+
|
183
183
|
# open sequence reader and expand input files paths
|
184
184
|
if options[:fastq]
|
185
185
|
|
186
|
-
if options[:fastq]=='-'
|
187
|
-
seqs_path = STDIN
|
188
|
-
else
|
189
|
-
seqs_path = File.expand_path(options[:fastq])
|
190
|
-
end
|
191
|
-
|
192
|
-
cd_hit_input_file = seqs_path
|
193
|
-
|
194
186
|
# choose fastq quality format
|
195
187
|
format=:sanger
|
196
188
|
|
@@ -203,13 +195,23 @@ class Seqtrim
|
|
203
195
|
format = :sanger
|
204
196
|
end
|
205
197
|
|
198
|
+
seqs_path=''
|
199
|
+
|
206
200
|
$LOG.info("Used FastQ format for input files: #{format}")
|
201
|
+
# iterate files
|
202
|
+
options[:fastq].each do |fastq_file|
|
203
|
+
|
204
|
+
if fastq_file=='-'
|
205
|
+
seqs_path = STDIN
|
206
|
+
else
|
207
|
+
seqs_path = File.expand_path(fastq_file)
|
208
|
+
end
|
209
|
+
|
210
|
+
sequence_readers << FastqFile.new(seqs_path,'r',format, true)
|
211
|
+
|
212
|
+
end
|
207
213
|
|
208
|
-
sequence_reader = FastqFile.new(seqs_path,'r',format, true)
|
209
|
-
# cd_hit_input_file = 'cd-hit-input.fasta'
|
210
214
|
cd_hit_input_file = seqs_path
|
211
|
-
# $LOG.info "Converting input file for cd-hit-454"
|
212
|
-
# $LOG.info "Conversion done"
|
213
215
|
|
214
216
|
else
|
215
217
|
|
@@ -217,7 +219,7 @@ class Seqtrim
|
|
217
219
|
cd_hit_input_file = seqs_path
|
218
220
|
|
219
221
|
qual_path = File.expand_path(options[:qual]) if qual_path
|
220
|
-
|
222
|
+
sequence_readers << FastaQualFile.new(options[:fasta],options[:qual],true)
|
221
223
|
|
222
224
|
end
|
223
225
|
|
@@ -250,12 +252,11 @@ class Seqtrim
|
|
250
252
|
$LOG.error "Plugin check failed"
|
251
253
|
|
252
254
|
# save used params to file
|
253
|
-
|
255
|
+
params.save_file('used_params.txt')
|
254
256
|
|
255
257
|
exit
|
256
258
|
end
|
257
259
|
|
258
|
-
|
259
260
|
if !Dir.exists?(OUTPUT_PATH)
|
260
261
|
Dir.mkdir(OUTPUT_PATH)
|
261
262
|
end
|
@@ -263,8 +264,7 @@ class Seqtrim
|
|
263
264
|
# Extract global stats
|
264
265
|
if params.get_param('generate_initial_stats')=='true'
|
265
266
|
$LOG.info "Calculatings stats"
|
266
|
-
|
267
|
-
ExtractStats.new(sequence_reader,params)
|
267
|
+
ExtractStats.new(sequence_readers,params)
|
268
268
|
else
|
269
269
|
$LOG.info "Skipping calculatings stats phase."
|
270
270
|
end
|
@@ -274,14 +274,6 @@ class Seqtrim
|
|
274
274
|
params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
|
275
275
|
|
276
276
|
piro_on = (params.get_param('next_generation_sequences')=='true')
|
277
|
-
|
278
|
-
# format blast database with truncated file
|
279
|
-
#MakeBlastDb.format_db(es.truncated_file_path,File.basename(es.truncated_file_path,File.extname(es.truncated_file_path)),'./') if piro_on
|
280
|
-
|
281
|
-
# leer mids
|
282
|
-
# params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
|
283
|
-
# params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
|
284
|
-
# params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
|
285
277
|
|
286
278
|
params.load_mids(params.get_param('mids_db'))
|
287
279
|
params.load_ab_adapters(params.get_param('adapters_ab_db'))
|
@@ -334,7 +326,7 @@ class Seqtrim
|
|
334
326
|
else
|
335
327
|
$LOG.info 'Starting server'
|
336
328
|
|
337
|
-
SeqtrimWorkManager.init_work_manager(
|
329
|
+
SeqtrimWorkManager.init_work_manager(sequence_readers, params,chunk_size,use_json,options[:skip_output])
|
338
330
|
|
339
331
|
begin
|
340
332
|
cpus=1
|
@@ -371,7 +363,10 @@ class Seqtrim
|
|
371
363
|
server.start_server
|
372
364
|
|
373
365
|
# close sequence reader
|
374
|
-
|
366
|
+
sequence_readers.each do |file|
|
367
|
+
file.close
|
368
|
+
end
|
369
|
+
|
375
370
|
$LOG.info 'Closing server'
|
376
371
|
end
|
377
372
|
|
@@ -10,6 +10,7 @@ class SequenceGroup
|
|
10
10
|
@seqs=seqs
|
11
11
|
@output_text={}
|
12
12
|
@output_files={}
|
13
|
+
|
13
14
|
end
|
14
15
|
|
15
16
|
|
@@ -31,6 +32,13 @@ class SequenceGroup
|
|
31
32
|
yield seq
|
32
33
|
end
|
33
34
|
end
|
35
|
+
|
36
|
+
def each_slice(n)
|
37
|
+
@seqs.each_slice(n) do |seqs|
|
38
|
+
yield seqs
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
34
42
|
|
35
43
|
def each_with_index
|
36
44
|
@seqs.each_with_index do |seq,i|
|
@@ -46,7 +54,19 @@ class SequenceGroup
|
|
46
54
|
end
|
47
55
|
|
48
56
|
def add(array)
|
49
|
-
@seqs
|
57
|
+
@seqs = @seqs + array
|
58
|
+
|
59
|
+
# sort by tuple_id and order in tuple
|
60
|
+
@seqs.sort! do |a,b|
|
61
|
+
comp = (a.tuple_id <=> b.tuple_id)
|
62
|
+
comp.zero? ? (a.order_in_tuple <=> b.order_in_tuple) : comp
|
63
|
+
end
|
64
|
+
|
65
|
+
# print
|
66
|
+
# @seqs.each do |s|
|
67
|
+
# puts "TID:#{s.tuple_id}, OIT: #{s.order_in_tuple}"
|
68
|
+
# end
|
69
|
+
|
50
70
|
end
|
51
71
|
|
52
72
|
def count
|
@@ -14,14 +14,13 @@ include Term::ANSIColor
|
|
14
14
|
class SequenceWithAction < Sequence
|
15
15
|
SHOW_QUAL = false
|
16
16
|
SHOW_FINAL_INSERTS=true
|
17
|
-
|
17
|
+
|
18
|
+
attr_accessor :actions,:seq_fasta_orig, :seq_qual_orig ,:insert_start , :insert_end, :stats , :insert_start_last , :insert_end_last, :order_in_tuple, :tuple_id, :tuple_size
|
18
19
|
|
19
20
|
# Creates an instance with the structure to storage the actions associated to a sequence
|
20
21
|
def initialize(seq_name,seq_fasta,seq_qual, seq_comment = '')
|
21
22
|
super
|
22
|
-
|
23
|
-
#if @ns_present then $LOG.debug "The sequence #{seq_name} has N's" else $LOG.debug "The sequence #{seq_name} hasn't N's" end
|
24
|
-
#if @xs_present then $LOG.debug "The sequence #{seq_name} has X's" else $LOG.debug "The sequence #{seq_name} hasn't X's" end
|
23
|
+
|
25
24
|
@actions = []
|
26
25
|
@seq_fasta_orig = seq_fasta
|
27
26
|
@seq_fasta = seq_fasta
|
@@ -32,18 +31,31 @@ class SequenceWithAction < Sequence
|
|
32
31
|
@insert_start = 0
|
33
32
|
@insert_end = seq_fasta.length-1
|
34
33
|
|
35
|
-
|
36
|
-
#@seq_qual = seq_qual
|
37
|
-
|
38
34
|
@stats={}
|
35
|
+
@comments=[]
|
39
36
|
|
40
37
|
@file_tags=[]
|
41
38
|
|
39
|
+
# for paired ends
|
40
|
+
@order_in_tuple=0
|
41
|
+
@tuple_id=0
|
42
|
+
@tuple_size=0
|
43
|
+
@file_tag_tuple_priority=0
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_comment(comment)
|
48
|
+
@comments.push comment
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_comment_line
|
52
|
+
return ([@seq_rejected_by_message]+@comments).compact.join(';')
|
42
53
|
end
|
43
54
|
|
44
55
|
# add a file tag to sequence
|
45
|
-
def add_file_tag(tag_level, tag_value, tag_type)
|
56
|
+
def add_file_tag(tag_level, tag_value, tag_type, priority=0)
|
46
57
|
@file_tags<< {:level => tag_level, :name => tag_value, :type=> tag_type}
|
58
|
+
@file_tag_tuple_priority=priority
|
47
59
|
end
|
48
60
|
|
49
61
|
# join file tags into a path
|
@@ -71,7 +83,7 @@ class SequenceWithAction < Sequence
|
|
71
83
|
|
72
84
|
# puts "#{dirname}, #{filename}"
|
73
85
|
|
74
|
-
return [dirname,filename]
|
86
|
+
return [dirname,filename,@file_tag_tuple_priority]
|
75
87
|
|
76
88
|
end
|
77
89
|
|
@@ -328,16 +340,16 @@ class SequenceWithAction < Sequence
|
|
328
340
|
output_res=[]
|
329
341
|
|
330
342
|
if @seq_rejected
|
331
|
-
output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
|
343
|
+
output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
|
332
344
|
# puts @seq_name.bold + bold + ' REJECTED BECAUSE ' +@seq_rejected_by_message.bold if @seq_rejected
|
333
345
|
else
|
334
|
-
output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline
|
346
|
+
output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline
|
335
347
|
|
336
|
-
end
|
348
|
+
end
|
337
349
|
|
338
350
|
n=1
|
339
351
|
withMessage = ["ActionIsContaminated","ActionVectors","ActionBadAdapter","ActionLeftAdapter","ActionRightAdapter"]
|
340
|
-
color = red
|
352
|
+
color = red
|
341
353
|
|
342
354
|
@actions.sort!{|e,f| e.start_pos<=>f.start_pos}.each do |a|
|
343
355
|
a_type=a.action_type
|