seqtrimnext 2.0.51 → 2.0.52

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -11,20 +11,20 @@ require "scbi_math"
11
11
 
12
12
  class ExtractStats
13
13
 
14
- def initialize(sequence_reader,params)
14
+ def initialize(sequence_readers,params)
15
15
 
16
16
  @sequence_lengths = [] #array of sequences lengths
17
17
  @length_frequency = [] #number of sequences of each size (frequency)
18
18
  @keys={} #found keys
19
19
  @params = params
20
- @use_qual=sequence_reader.with_qual?
20
+ @use_qual=sequence_readers.first.with_qual?
21
21
  # @params.get_param('use_qual')
22
22
 
23
23
  @totalnt=0
24
24
  @qv=[]
25
25
 
26
26
 
27
- @sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(sequence_reader)
27
+ @sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(sequence_readers)
28
28
 
29
29
 
30
30
  set_params_and_results
@@ -37,30 +37,33 @@ class ExtractStats
37
37
 
38
38
  end
39
39
 
40
- def extract_stats_from_sequences(sequence_reader)
40
+ def extract_stats_from_sequences(sequence_readers)
41
+ sequence_readers.each do |sequence_reader|
41
42
 
42
- sequence_reader.each do |name_seq,fasta_seq,qual|
43
- l = fasta_seq.length
44
43
 
45
- @totalnt+=l
44
+ sequence_reader.each do |name_seq,fasta_seq,qual|
45
+ l = fasta_seq.length
46
46
 
47
- #save all lengths
48
- @sequence_lengths.push l
47
+ @totalnt+=l
49
48
 
50
- # add key value
51
- add_key(fasta_seq[0..3].upcase)
49
+ #save all lengths
50
+ @sequence_lengths.push l
52
51
 
53
- # add fasta length
54
- @length_frequency[fasta_seq.length] = (@length_frequency[fasta_seq.length] || 1 ) + 1
52
+ # add key value
53
+ add_key(fasta_seq[0..3].upcase)
55
54
 
56
- #extract qv values
57
- extract_qv_from_sequence(qual) if @use_qual
55
+ # add fasta length
56
+ @length_frequency[fasta_seq.length] = (@length_frequency[fasta_seq.length] || 1 ) + 1
58
57
 
59
- # print some progress info
60
- if (sequence_reader.num_seqs % 10000==0)
61
- puts "Calculating stats: #{sequence_reader.num_seqs}"
62
- end
58
+ #extract qv values
59
+ extract_qv_from_sequence(qual) if @use_qual
60
+
61
+ # print some progress info
62
+ if (sequence_reader.num_seqs % 10000==0)
63
+ puts "Calculating stats: #{sequence_reader.num_seqs}"
64
+ end
63
65
 
66
+ end
64
67
  end
65
68
 
66
69
  length_stats = ScbiNArray.to_na(@sequence_lengths)
@@ -74,9 +77,9 @@ class ExtractStats
74
77
 
75
78
  ## PLOT RESULTS
76
79
  if !File.exists?('graphs')
77
- Dir.mkdir('graphs')
80
+ Dir.mkdir('graphs')
78
81
  end
79
-
82
+
80
83
 
81
84
  x = []
82
85
  y = []
@@ -107,10 +110,10 @@ class ExtractStats
107
110
  def plot_qualities
108
111
 
109
112
  if !File.exists?('graphs')
110
- Dir.mkdir('graphs')
113
+ Dir.mkdir('graphs')
111
114
  end
112
115
  minimum_qual_value = @params.get_param('min_quality').to_i
113
-
116
+
114
117
  # get qualities values
115
118
  x=[]
116
119
  y=[]
@@ -127,7 +127,7 @@ class Params
127
127
 
128
128
  # line doesn't finish in *
129
129
  if (line[0]!='>'[0]) && (!(line =~ /\*$/))
130
-
130
+
131
131
  #puts line
132
132
  # puts line,line[0]
133
133
  if line =~ />([^\.]+)\.\.\.\s/
@@ -189,12 +189,12 @@ class Params
189
189
 
190
190
  def get_fasta(list,name,type)
191
191
  res = list[name]
192
-
192
+
193
193
  if res.nil?
194
194
  $LOG.error("Error. The #{type}: #{name} was not correctly loaded")
195
195
  raise "Error. The #{type}: #{name} was not found in loaded #{name}s: #{list.map{|k,v| k}}."
196
196
  end
197
-
197
+
198
198
  return res
199
199
  end
200
200
 
@@ -231,165 +231,151 @@ class Params
231
231
  line = Regexp.last_match[2].to_i
232
232
  method = Regexp.last_match[3]
233
233
  plugin=File.basename(file,File.extname(file))
234
-
235
- # puts "CALLER: #{plugin}"
236
- # puts [file, line, method]
237
234
 
238
235
  end
239
236
 
240
237
  end
241
238
 
242
- def set_param(param,value,comment=nil)
243
- plugin=get_plugin
239
+ def set_param(param,value,comment = nil)
240
+ plugin=get_plugin
244
241
 
245
- @params[param] = value
242
+ @params[param] = value
246
243
 
247
- if get_comment(plugin,param).nil?
248
- set_comment(plugin,param,comment)
249
- end
244
+ if get_comment(plugin,param).nil?
245
+ set_comment(plugin,param,comment)
246
+ end
250
247
 
251
248
 
252
- end
253
-
254
- # def set_order(plugin,param)
255
- #
256
- # if @param_order[plugin].nil?
257
- # @param_order[plugin]=[]
258
- # end
259
- #
260
- # if !@param_order[plugin].index(param)
261
- # @param_order[plugin].push param
262
- # end
263
- # end
264
-
249
+ end
265
250
 
266
251
  def get_comment(plugin,param)
267
- res = nil
268
- if @plugin_comments[plugin]
269
- res =@plugin_comments[plugin][param]
270
- end
271
- return res
272
- end
252
+ res = nil
253
+ if @plugin_comments[plugin]
254
+ res =@plugin_comments[plugin][param]
255
+ end
256
+ return res
257
+ end
273
258
 
274
259
 
275
260
  def set_comment(plugin,param,comment)
276
- if !comment.is_a?(Array) && !comment.nil?
277
- comment=comment.split("\n").compact.map{|l| l.strip}
278
- end
261
+ if !comment.is_a?(Array) && !comment.nil?
262
+ comment=comment.split("\n").compact.map{|l| l.strip}
263
+ end
279
264
 
280
- if @plugin_comments[plugin].nil?
281
- @plugin_comments[plugin]={}
282
- end
265
+ if @plugin_comments[plugin].nil?
266
+ @plugin_comments[plugin]={}
267
+ end
283
268
 
284
- old_comment=''
285
- # remove from other plugins
286
- @plugin_comments.each do |plugin_name,comments|
287
- if comments.keys.include?(param) && plugin_name!=plugin
288
- old_comment=comments[param]
289
- comments.delete(param)
290
- end
291
- end
269
+ old_comment=''
270
+ # remove from other plugins
271
+ @plugin_comments.each do |plugin_name,comments|
272
+ if comments.keys.include?(param) && plugin_name!=plugin
273
+ old_comment=comments[param]
274
+ comments.delete(param)
275
+ end
276
+ end
292
277
 
293
- if comment.nil?
294
- comment=old_comment
295
- end
278
+ if comment.nil?
279
+ comment=old_comment
280
+ end
296
281
 
297
- # @comments[param]=(comment || [''])
298
- @plugin_comments[plugin][param]=(comment || [''])
299
- # puts @plugin_comments.keys.to_json
282
+ # @comments[param]=(comment || [''])
283
+ @plugin_comments[plugin][param]=(comment || [''])
284
+ # puts @plugin_comments.keys.to_json
300
285
 
301
- # remove empty comments
286
+ # remove empty comments
302
287
 
303
- @plugin_comments.reverse_each do |plugin_name,comments|
304
- if comments.empty?
305
- @plugin_comments.delete(plugin_name)
306
- end
307
- end
288
+ @plugin_comments.reverse_each do |plugin_name,comments|
289
+ if comments.empty?
290
+ @plugin_comments.delete(plugin_name)
291
+ end
292
+ end
293
+
294
+ end
308
295
 
309
- end
310
-
311
296
 
312
297
  def set_mid(param,value)
313
- @mids[param] = value
314
- end
315
- #attr_accessor :h # to accede to the atribute 'h' from out of this class
298
+ @mids[param] = value
299
+ end
316
300
 
317
301
  # Returns true if exists the parameter and nil if don't
318
302
  def exists?(param_name)
319
- return !@params[param_name].nil?
320
- end
321
-
303
+ return !@params[param_name].nil?
304
+ end
305
+
322
306
  def check_plugin_list_param(errors,param_name)
323
- # get plugin list
324
- pl_list=get_param(param_name)
307
+ # get plugin list
308
+ pl_list=get_param(param_name)
325
309
 
326
- # puts pl_list,param_name
327
- list=pl_list.split(',')
310
+ # puts pl_list,param_name
311
+ list=pl_list.split(',')
328
312
 
329
- list.map!{|e| e.strip}
313
+ list.map!{|e| e.strip}
330
314
 
331
- # puts "Lista:",list.join(',')
315
+ # puts "Lista:",list.join(',')
332
316
 
333
317
 
334
- # always the pluginExtractInserts at the end
335
- list.delete('PluginExtractInserts')
336
- list << 'PluginExtractInserts'
318
+ # always the pluginExtractInserts at the end
319
+ list.delete('PluginExtractInserts')
320
+ list << 'PluginExtractInserts'
337
321
 
338
- set_param(param_name,list.join(','))
339
- # if !list.include?('PluginExtractInserts')
340
- # raise "PluginExtractInserts do not exists"
341
- #
342
- # end
322
+ set_param(param_name,list.join(','))
323
+ # if !list.include?('PluginExtractInserts')
324
+ # raise "PluginExtractInserts do not exists"
325
+ #
326
+ # end
343
327
 
344
328
 
345
329
 
346
- end
330
+ end
347
331
 
348
332
  # def split_databases(db_param_name)
349
333
  def check_db_param(errors,db_param_name)
350
- # expand database paths
351
- dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
352
- # puts "ALGO"*20
353
- # puts "INPUT DATABASES:\n"+dbs.join(',')
354
-
355
- procesed_dbs=[]
356
- #
357
- # TODO - chequear aqui que la db no esta vacia y que esta formateada.
358
- dbs.reverse_each {|db_p|
359
- db=File.expand_path(db_p)
360
-
361
- if !File.exists?(db)
362
- path=File.join($FORMATTED_DB_PATH,db_p)
363
- else
364
- path=db
365
- end
366
-
367
-
368
- if Dir.glob(path+'*.n*').entries.empty?
369
- puts "DB file #{path} not formatted"
370
-
371
- if File.writable_real?(path)
372
- cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
373
- system(cmd)
374
- else
375
- raise "Can't format database. We don't have write permissions in: #{path}"
376
- end
377
- end
378
-
379
- procesed_dbs << path
380
-
381
- if !File.exists?(path)
382
- raise "DB File #{path} does not exists"
383
- # exit
384
- end
385
- }
386
-
387
- db_paths = '"'+procesed_dbs.join(' ')+'"'
388
-
389
- set_param(db_param_name,db_paths)
390
-
391
- puts "USED DATABASES\n"+db_paths
392
- end
334
+ if !get_param(db_param_name).empty?
335
+ # expand database paths
336
+ dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
337
+ # puts "ALGO"*20
338
+ # puts "INPUT DATABASES:\n"+dbs.join(',')
339
+
340
+ procesed_dbs=[]
341
+ #
342
+ # TODO - chequear aqui que la db no esta vacia y que esta formateada.
343
+ dbs.reverse_each {|db_p|
344
+ db=File.expand_path(db_p)
345
+
346
+ if !File.exists?(db)
347
+ path=File.join($FORMATTED_DB_PATH,db_p)
348
+ else
349
+ path=db
350
+ end
351
+
352
+
353
+ if Dir.glob(path+'*.n*').entries.empty?
354
+ puts "DB file #{path} not formatted"
355
+
356
+ if File.writable_real?(path)
357
+ cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
358
+ system(cmd)
359
+ else
360
+ raise "Can't format database. We don't have write permissions in: #{path}"
361
+ end
362
+ end
363
+
364
+ procesed_dbs << path
365
+
366
+ if !File.exists?(path)
367
+ raise "DB File #{path} does not exists"
368
+ # exit
369
+ end
370
+ }
371
+
372
+ db_paths = '"'+procesed_dbs.join(' ')+'"'
373
+
374
+ set_param(db_param_name,db_paths)
375
+
376
+ puts "USED DATABASES\n"+db_paths
377
+ end
378
+ end
393
379
 
394
380
 
395
381
  def self.generate_sample_params
@@ -29,7 +29,6 @@ class PluginManager
29
29
 
30
30
  # keeps a list of rejected sequences
31
31
 
32
-
33
32
  rejected_seqs = []
34
33
 
35
34
  @plugin_names.each do |plugin_name|
@@ -51,11 +50,10 @@ class PluginManager
51
50
 
52
51
  # Creates an instance of the respective plugin stored in "plugin_name",and asociate it to the sequence 'seq'
53
52
  plugin_class = Object.const_get(plugin_name)
54
- # puts "ANTES *************" + seq.to_text_seq_fasta
53
+
55
54
  plugin_execution=plugin_class.new(running_seqs,@params)
56
- #puts plugin_name+':'+ plugin_execution.stats.to_json
57
- running_seqs.stats[plugin_name] = plugin_execution.stats
58
55
 
56
+ running_seqs.stats[plugin_name] = plugin_execution.stats
59
57
 
60
58
  # puts running_seqs.stats.to_json
61
59
  plugin_execution=nil
@@ -175,22 +175,14 @@ class Seqtrim
175
175
  # it is the server part
176
176
  if !only_workers then
177
177
 
178
- sequence_reader = nil
179
178
  cd_hit_input_file = nil
180
179
 
181
180
  # TODO - FIX seqtrim to not iterate two times over input, so STDIN can be used
182
-
181
+ sequence_readers=[]
182
+
183
183
  # open sequence reader and expand input files paths
184
184
  if options[:fastq]
185
185
 
186
- if options[:fastq]=='-'
187
- seqs_path = STDIN
188
- else
189
- seqs_path = File.expand_path(options[:fastq])
190
- end
191
-
192
- cd_hit_input_file = seqs_path
193
-
194
186
  # choose fastq quality format
195
187
  format=:sanger
196
188
 
@@ -203,13 +195,23 @@ class Seqtrim
203
195
  format = :sanger
204
196
  end
205
197
 
198
+ seqs_path=''
199
+
206
200
  $LOG.info("Used FastQ format for input files: #{format}")
201
+ # iterate files
202
+ options[:fastq].each do |fastq_file|
203
+
204
+ if fastq_file=='-'
205
+ seqs_path = STDIN
206
+ else
207
+ seqs_path = File.expand_path(fastq_file)
208
+ end
209
+
210
+ sequence_readers << FastqFile.new(seqs_path,'r',format, true)
211
+
212
+ end
207
213
 
208
- sequence_reader = FastqFile.new(seqs_path,'r',format, true)
209
- # cd_hit_input_file = 'cd-hit-input.fasta'
210
214
  cd_hit_input_file = seqs_path
211
- # $LOG.info "Converting input file for cd-hit-454"
212
- # $LOG.info "Conversion done"
213
215
 
214
216
  else
215
217
 
@@ -217,7 +219,7 @@ class Seqtrim
217
219
  cd_hit_input_file = seqs_path
218
220
 
219
221
  qual_path = File.expand_path(options[:qual]) if qual_path
220
- sequence_reader = FastaQualFile.new(options[:fasta],options[:qual],true)
222
+ sequence_readers << FastaQualFile.new(options[:fasta],options[:qual],true)
221
223
 
222
224
  end
223
225
 
@@ -250,12 +252,11 @@ class Seqtrim
250
252
  $LOG.error "Plugin check failed"
251
253
 
252
254
  # save used params to file
253
- params.save_file('used_params.txt')
255
+ params.save_file('used_params.txt')
254
256
 
255
257
  exit
256
258
  end
257
259
 
258
-
259
260
  if !Dir.exists?(OUTPUT_PATH)
260
261
  Dir.mkdir(OUTPUT_PATH)
261
262
  end
@@ -263,8 +264,7 @@ class Seqtrim
263
264
  # Extract global stats
264
265
  if params.get_param('generate_initial_stats')=='true'
265
266
  $LOG.info "Calculatings stats"
266
-
267
- ExtractStats.new(sequence_reader,params)
267
+ ExtractStats.new(sequence_readers,params)
268
268
  else
269
269
  $LOG.info "Skipping calculatings stats phase."
270
270
  end
@@ -274,14 +274,6 @@ class Seqtrim
274
274
  params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
275
275
 
276
276
  piro_on = (params.get_param('next_generation_sequences')=='true')
277
-
278
- # format blast database with truncated file
279
- #MakeBlastDb.format_db(es.truncated_file_path,File.basename(es.truncated_file_path,File.extname(es.truncated_file_path)),'./') if piro_on
280
-
281
- # leer mids
282
- # params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
283
- # params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
284
- # params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
285
277
 
286
278
  params.load_mids(params.get_param('mids_db'))
287
279
  params.load_ab_adapters(params.get_param('adapters_ab_db'))
@@ -334,7 +326,7 @@ class Seqtrim
334
326
  else
335
327
  $LOG.info 'Starting server'
336
328
 
337
- SeqtrimWorkManager.init_work_manager(sequence_reader, params,chunk_size,use_json,options[:skip_output])
329
+ SeqtrimWorkManager.init_work_manager(sequence_readers, params,chunk_size,use_json,options[:skip_output])
338
330
 
339
331
  begin
340
332
  cpus=1
@@ -371,7 +363,10 @@ class Seqtrim
371
363
  server.start_server
372
364
 
373
365
  # close sequence reader
374
- sequence_reader.close
366
+ sequence_readers.each do |file|
367
+ file.close
368
+ end
369
+
375
370
  $LOG.info 'Closing server'
376
371
  end
377
372
 
@@ -20,8 +20,8 @@ class Sequence
20
20
 
21
21
  @seq_rejected_by_message=''
22
22
 
23
- @ns_present = ns_present?
24
- @xs_present = xs_present?
23
+ @ns_present = ns_present?
24
+ @xs_present = xs_present?
25
25
 
26
26
 
27
27
 
@@ -10,6 +10,7 @@ class SequenceGroup
10
10
  @seqs=seqs
11
11
  @output_text={}
12
12
  @output_files={}
13
+
13
14
  end
14
15
 
15
16
 
@@ -31,6 +32,13 @@ class SequenceGroup
31
32
  yield seq
32
33
  end
33
34
  end
35
+
36
+ def each_slice(n)
37
+ @seqs.each_slice(n) do |seqs|
38
+ yield seqs
39
+ end
40
+ end
41
+
34
42
 
35
43
  def each_with_index
36
44
  @seqs.each_with_index do |seq,i|
@@ -46,7 +54,19 @@ class SequenceGroup
46
54
  end
47
55
 
48
56
  def add(array)
49
- @seqs += array
57
+ @seqs = @seqs + array
58
+
59
+ # sort by tuple_id and order in tuple
60
+ @seqs.sort! do |a,b|
61
+ comp = (a.tuple_id <=> b.tuple_id)
62
+ comp.zero? ? (a.order_in_tuple <=> b.order_in_tuple) : comp
63
+ end
64
+
65
+ # print
66
+ # @seqs.each do |s|
67
+ # puts "TID:#{s.tuple_id}, OIT: #{s.order_in_tuple}"
68
+ # end
69
+
50
70
  end
51
71
 
52
72
  def count
@@ -14,14 +14,13 @@ include Term::ANSIColor
14
14
  class SequenceWithAction < Sequence
15
15
  SHOW_QUAL = false
16
16
  SHOW_FINAL_INSERTS=true
17
- attr_accessor :actions,:seq_fasta_orig, :seq_qual_orig ,:insert_start , :insert_end, :stats , :insert_start_last , :insert_end_last
17
+
18
+ attr_accessor :actions,:seq_fasta_orig, :seq_qual_orig ,:insert_start , :insert_end, :stats , :insert_start_last , :insert_end_last, :order_in_tuple, :tuple_id, :tuple_size
18
19
 
19
20
  # Creates an instance with the structure to storage the actions associated to a sequence
20
21
  def initialize(seq_name,seq_fasta,seq_qual, seq_comment = '')
21
22
  super
22
- #Tried
23
- #if @ns_present then $LOG.debug "The sequence #{seq_name} has N's" else $LOG.debug "The sequence #{seq_name} hasn't N's" end
24
- #if @xs_present then $LOG.debug "The sequence #{seq_name} has X's" else $LOG.debug "The sequence #{seq_name} hasn't X's" end
23
+
25
24
  @actions = []
26
25
  @seq_fasta_orig = seq_fasta
27
26
  @seq_fasta = seq_fasta
@@ -32,18 +31,31 @@ class SequenceWithAction < Sequence
32
31
  @insert_start = 0
33
32
  @insert_end = seq_fasta.length-1
34
33
 
35
-
36
- #@seq_qual = seq_qual
37
-
38
34
  @stats={}
35
+ @comments=[]
39
36
 
40
37
  @file_tags=[]
41
38
 
39
+ # for paired ends
40
+ @order_in_tuple=0
41
+ @tuple_id=0
42
+ @tuple_size=0
43
+ @file_tag_tuple_priority=0
44
+
45
+ end
46
+
47
+ def add_comment(comment)
48
+ @comments.push comment
49
+ end
50
+
51
+ def get_comment_line
52
+ return ([@seq_rejected_by_message]+@comments).compact.join(';')
42
53
  end
43
54
 
44
55
  # add a file tag to sequence
45
- def add_file_tag(tag_level, tag_value, tag_type)
56
+ def add_file_tag(tag_level, tag_value, tag_type, priority=0)
46
57
  @file_tags<< {:level => tag_level, :name => tag_value, :type=> tag_type}
58
+ @file_tag_tuple_priority=priority
47
59
  end
48
60
 
49
61
  # join file tags into a path
@@ -71,7 +83,7 @@ class SequenceWithAction < Sequence
71
83
 
72
84
  # puts "#{dirname}, #{filename}"
73
85
 
74
- return [dirname,filename]
86
+ return [dirname,filename,@file_tag_tuple_priority]
75
87
 
76
88
  end
77
89
 
@@ -328,16 +340,16 @@ class SequenceWithAction < Sequence
328
340
  output_res=[]
329
341
 
330
342
  if @seq_rejected
331
- output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
343
+ output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
332
344
  # puts @seq_name.bold + bold + ' REJECTED BECAUSE ' +@seq_rejected_by_message.bold if @seq_rejected
333
345
  else
334
- output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline
346
+ output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline
335
347
 
336
- end
348
+ end
337
349
 
338
350
  n=1
339
351
  withMessage = ["ActionIsContaminated","ActionVectors","ActionBadAdapter","ActionLeftAdapter","ActionRightAdapter"]
340
- color = red
352
+ color = red
341
353
 
342
354
  @actions.sort!{|e,f| e.start_pos<=>f.start_pos}.each do |a|
343
355
  a_type=a.action_type