seqtrimnext 2.0.51 → 2.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -11,20 +11,20 @@ require "scbi_math"
11
11
 
12
12
  class ExtractStats
13
13
 
14
- def initialize(sequence_reader,params)
14
+ def initialize(sequence_readers,params)
15
15
 
16
16
  @sequence_lengths = [] #array of sequences lengths
17
17
  @length_frequency = [] #number of sequences of each size (frequency)
18
18
  @keys={} #found keys
19
19
  @params = params
20
- @use_qual=sequence_reader.with_qual?
20
+ @use_qual=sequence_readers.first.with_qual?
21
21
  # @params.get_param('use_qual')
22
22
 
23
23
  @totalnt=0
24
24
  @qv=[]
25
25
 
26
26
 
27
- @sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(sequence_reader)
27
+ @sequence_lengths_stats, @length_frequency_stats, @quality_stats = extract_stats_from_sequences(sequence_readers)
28
28
 
29
29
 
30
30
  set_params_and_results
@@ -37,30 +37,33 @@ class ExtractStats
37
37
 
38
38
  end
39
39
 
40
- def extract_stats_from_sequences(sequence_reader)
40
+ def extract_stats_from_sequences(sequence_readers)
41
+ sequence_readers.each do |sequence_reader|
41
42
 
42
- sequence_reader.each do |name_seq,fasta_seq,qual|
43
- l = fasta_seq.length
44
43
 
45
- @totalnt+=l
44
+ sequence_reader.each do |name_seq,fasta_seq,qual|
45
+ l = fasta_seq.length
46
46
 
47
- #save all lengths
48
- @sequence_lengths.push l
47
+ @totalnt+=l
49
48
 
50
- # add key value
51
- add_key(fasta_seq[0..3].upcase)
49
+ #save all lengths
50
+ @sequence_lengths.push l
52
51
 
53
- # add fasta length
54
- @length_frequency[fasta_seq.length] = (@length_frequency[fasta_seq.length] || 1 ) + 1
52
+ # add key value
53
+ add_key(fasta_seq[0..3].upcase)
55
54
 
56
- #extract qv values
57
- extract_qv_from_sequence(qual) if @use_qual
55
+ # add fasta length
56
+ @length_frequency[fasta_seq.length] = (@length_frequency[fasta_seq.length] || 1 ) + 1
58
57
 
59
- # print some progress info
60
- if (sequence_reader.num_seqs % 10000==0)
61
- puts "Calculating stats: #{sequence_reader.num_seqs}"
62
- end
58
+ #extract qv values
59
+ extract_qv_from_sequence(qual) if @use_qual
60
+
61
+ # print some progress info
62
+ if (sequence_reader.num_seqs % 10000==0)
63
+ puts "Calculating stats: #{sequence_reader.num_seqs}"
64
+ end
63
65
 
66
+ end
64
67
  end
65
68
 
66
69
  length_stats = ScbiNArray.to_na(@sequence_lengths)
@@ -74,9 +77,9 @@ class ExtractStats
74
77
 
75
78
  ## PLOT RESULTS
76
79
  if !File.exists?('graphs')
77
- Dir.mkdir('graphs')
80
+ Dir.mkdir('graphs')
78
81
  end
79
-
82
+
80
83
 
81
84
  x = []
82
85
  y = []
@@ -107,10 +110,10 @@ class ExtractStats
107
110
  def plot_qualities
108
111
 
109
112
  if !File.exists?('graphs')
110
- Dir.mkdir('graphs')
113
+ Dir.mkdir('graphs')
111
114
  end
112
115
  minimum_qual_value = @params.get_param('min_quality').to_i
113
-
116
+
114
117
  # get qualities values
115
118
  x=[]
116
119
  y=[]
@@ -127,7 +127,7 @@ class Params
127
127
 
128
128
  # line doesn't finish in *
129
129
  if (line[0]!='>'[0]) && (!(line =~ /\*$/))
130
-
130
+
131
131
  #puts line
132
132
  # puts line,line[0]
133
133
  if line =~ />([^\.]+)\.\.\.\s/
@@ -189,12 +189,12 @@ class Params
189
189
 
190
190
  def get_fasta(list,name,type)
191
191
  res = list[name]
192
-
192
+
193
193
  if res.nil?
194
194
  $LOG.error("Error. The #{type}: #{name} was not correctly loaded")
195
195
  raise "Error. The #{type}: #{name} was not found in loaded #{name}s: #{list.map{|k,v| k}}."
196
196
  end
197
-
197
+
198
198
  return res
199
199
  end
200
200
 
@@ -231,165 +231,151 @@ class Params
231
231
  line = Regexp.last_match[2].to_i
232
232
  method = Regexp.last_match[3]
233
233
  plugin=File.basename(file,File.extname(file))
234
-
235
- # puts "CALLER: #{plugin}"
236
- # puts [file, line, method]
237
234
 
238
235
  end
239
236
 
240
237
  end
241
238
 
242
- def set_param(param,value,comment=nil)
243
- plugin=get_plugin
239
+ def set_param(param,value,comment = nil)
240
+ plugin=get_plugin
244
241
 
245
- @params[param] = value
242
+ @params[param] = value
246
243
 
247
- if get_comment(plugin,param).nil?
248
- set_comment(plugin,param,comment)
249
- end
244
+ if get_comment(plugin,param).nil?
245
+ set_comment(plugin,param,comment)
246
+ end
250
247
 
251
248
 
252
- end
253
-
254
- # def set_order(plugin,param)
255
- #
256
- # if @param_order[plugin].nil?
257
- # @param_order[plugin]=[]
258
- # end
259
- #
260
- # if !@param_order[plugin].index(param)
261
- # @param_order[plugin].push param
262
- # end
263
- # end
264
-
249
+ end
265
250
 
266
251
  def get_comment(plugin,param)
267
- res = nil
268
- if @plugin_comments[plugin]
269
- res =@plugin_comments[plugin][param]
270
- end
271
- return res
272
- end
252
+ res = nil
253
+ if @plugin_comments[plugin]
254
+ res =@plugin_comments[plugin][param]
255
+ end
256
+ return res
257
+ end
273
258
 
274
259
 
275
260
  def set_comment(plugin,param,comment)
276
- if !comment.is_a?(Array) && !comment.nil?
277
- comment=comment.split("\n").compact.map{|l| l.strip}
278
- end
261
+ if !comment.is_a?(Array) && !comment.nil?
262
+ comment=comment.split("\n").compact.map{|l| l.strip}
263
+ end
279
264
 
280
- if @plugin_comments[plugin].nil?
281
- @plugin_comments[plugin]={}
282
- end
265
+ if @plugin_comments[plugin].nil?
266
+ @plugin_comments[plugin]={}
267
+ end
283
268
 
284
- old_comment=''
285
- # remove from other plugins
286
- @plugin_comments.each do |plugin_name,comments|
287
- if comments.keys.include?(param) && plugin_name!=plugin
288
- old_comment=comments[param]
289
- comments.delete(param)
290
- end
291
- end
269
+ old_comment=''
270
+ # remove from other plugins
271
+ @plugin_comments.each do |plugin_name,comments|
272
+ if comments.keys.include?(param) && plugin_name!=plugin
273
+ old_comment=comments[param]
274
+ comments.delete(param)
275
+ end
276
+ end
292
277
 
293
- if comment.nil?
294
- comment=old_comment
295
- end
278
+ if comment.nil?
279
+ comment=old_comment
280
+ end
296
281
 
297
- # @comments[param]=(comment || [''])
298
- @plugin_comments[plugin][param]=(comment || [''])
299
- # puts @plugin_comments.keys.to_json
282
+ # @comments[param]=(comment || [''])
283
+ @plugin_comments[plugin][param]=(comment || [''])
284
+ # puts @plugin_comments.keys.to_json
300
285
 
301
- # remove empty comments
286
+ # remove empty comments
302
287
 
303
- @plugin_comments.reverse_each do |plugin_name,comments|
304
- if comments.empty?
305
- @plugin_comments.delete(plugin_name)
306
- end
307
- end
288
+ @plugin_comments.reverse_each do |plugin_name,comments|
289
+ if comments.empty?
290
+ @plugin_comments.delete(plugin_name)
291
+ end
292
+ end
293
+
294
+ end
308
295
 
309
- end
310
-
311
296
 
312
297
  def set_mid(param,value)
313
- @mids[param] = value
314
- end
315
- #attr_accessor :h # to accede to the atribute 'h' from out of this class
298
+ @mids[param] = value
299
+ end
316
300
 
317
301
  # Returns true if exists the parameter and nil if don't
318
302
  def exists?(param_name)
319
- return !@params[param_name].nil?
320
- end
321
-
303
+ return !@params[param_name].nil?
304
+ end
305
+
322
306
  def check_plugin_list_param(errors,param_name)
323
- # get plugin list
324
- pl_list=get_param(param_name)
307
+ # get plugin list
308
+ pl_list=get_param(param_name)
325
309
 
326
- # puts pl_list,param_name
327
- list=pl_list.split(',')
310
+ # puts pl_list,param_name
311
+ list=pl_list.split(',')
328
312
 
329
- list.map!{|e| e.strip}
313
+ list.map!{|e| e.strip}
330
314
 
331
- # puts "Lista:",list.join(',')
315
+ # puts "Lista:",list.join(',')
332
316
 
333
317
 
334
- # always the pluginExtractInserts at the end
335
- list.delete('PluginExtractInserts')
336
- list << 'PluginExtractInserts'
318
+ # always the pluginExtractInserts at the end
319
+ list.delete('PluginExtractInserts')
320
+ list << 'PluginExtractInserts'
337
321
 
338
- set_param(param_name,list.join(','))
339
- # if !list.include?('PluginExtractInserts')
340
- # raise "PluginExtractInserts do not exists"
341
- #
342
- # end
322
+ set_param(param_name,list.join(','))
323
+ # if !list.include?('PluginExtractInserts')
324
+ # raise "PluginExtractInserts do not exists"
325
+ #
326
+ # end
343
327
 
344
328
 
345
329
 
346
- end
330
+ end
347
331
 
348
332
  # def split_databases(db_param_name)
349
333
  def check_db_param(errors,db_param_name)
350
- # expand database paths
351
- dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
352
- # puts "ALGO"*20
353
- # puts "INPUT DATABASES:\n"+dbs.join(',')
354
-
355
- procesed_dbs=[]
356
- #
357
- # TODO - chequear aqui que la db no esta vacia y que esta formateada.
358
- dbs.reverse_each {|db_p|
359
- db=File.expand_path(db_p)
360
-
361
- if !File.exists?(db)
362
- path=File.join($FORMATTED_DB_PATH,db_p)
363
- else
364
- path=db
365
- end
366
-
367
-
368
- if Dir.glob(path+'*.n*').entries.empty?
369
- puts "DB file #{path} not formatted"
370
-
371
- if File.writable_real?(path)
372
- cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
373
- system(cmd)
374
- else
375
- raise "Can't format database. We don't have write permissions in: #{path}"
376
- end
377
- end
378
-
379
- procesed_dbs << path
380
-
381
- if !File.exists?(path)
382
- raise "DB File #{path} does not exists"
383
- # exit
384
- end
385
- }
386
-
387
- db_paths = '"'+procesed_dbs.join(' ')+'"'
388
-
389
- set_param(db_param_name,db_paths)
390
-
391
- puts "USED DATABASES\n"+db_paths
392
- end
334
+ if !get_param(db_param_name).empty?
335
+ # expand database paths
336
+ dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
337
+ # puts "ALGO"*20
338
+ # puts "INPUT DATABASES:\n"+dbs.join(',')
339
+
340
+ procesed_dbs=[]
341
+ #
342
+ # TODO - chequear aqui que la db no esta vacia y que esta formateada.
343
+ dbs.reverse_each {|db_p|
344
+ db=File.expand_path(db_p)
345
+
346
+ if !File.exists?(db)
347
+ path=File.join($FORMATTED_DB_PATH,db_p)
348
+ else
349
+ path=db
350
+ end
351
+
352
+
353
+ if Dir.glob(path+'*.n*').entries.empty?
354
+ puts "DB file #{path} not formatted"
355
+
356
+ if File.writable_real?(path)
357
+ cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
358
+ system(cmd)
359
+ else
360
+ raise "Can't format database. We don't have write permissions in: #{path}"
361
+ end
362
+ end
363
+
364
+ procesed_dbs << path
365
+
366
+ if !File.exists?(path)
367
+ raise "DB File #{path} does not exists"
368
+ # exit
369
+ end
370
+ }
371
+
372
+ db_paths = '"'+procesed_dbs.join(' ')+'"'
373
+
374
+ set_param(db_param_name,db_paths)
375
+
376
+ puts "USED DATABASES\n"+db_paths
377
+ end
378
+ end
393
379
 
394
380
 
395
381
  def self.generate_sample_params
@@ -29,7 +29,6 @@ class PluginManager
29
29
 
30
30
  # keeps a list of rejected sequences
31
31
 
32
-
33
32
  rejected_seqs = []
34
33
 
35
34
  @plugin_names.each do |plugin_name|
@@ -51,11 +50,10 @@ class PluginManager
51
50
 
52
51
  # Creates an instance of the respective plugin stored in "plugin_name",and asociate it to the sequence 'seq'
53
52
  plugin_class = Object.const_get(plugin_name)
54
- # puts "ANTES *************" + seq.to_text_seq_fasta
53
+
55
54
  plugin_execution=plugin_class.new(running_seqs,@params)
56
- #puts plugin_name+':'+ plugin_execution.stats.to_json
57
- running_seqs.stats[plugin_name] = plugin_execution.stats
58
55
 
56
+ running_seqs.stats[plugin_name] = plugin_execution.stats
59
57
 
60
58
  # puts running_seqs.stats.to_json
61
59
  plugin_execution=nil
@@ -175,22 +175,14 @@ class Seqtrim
175
175
  # it is the server part
176
176
  if !only_workers then
177
177
 
178
- sequence_reader = nil
179
178
  cd_hit_input_file = nil
180
179
 
181
180
  # TODO - FIX seqtrim to not iterate two times over input, so STDIN can be used
182
-
181
+ sequence_readers=[]
182
+
183
183
  # open sequence reader and expand input files paths
184
184
  if options[:fastq]
185
185
 
186
- if options[:fastq]=='-'
187
- seqs_path = STDIN
188
- else
189
- seqs_path = File.expand_path(options[:fastq])
190
- end
191
-
192
- cd_hit_input_file = seqs_path
193
-
194
186
  # choose fastq quality format
195
187
  format=:sanger
196
188
 
@@ -203,13 +195,23 @@ class Seqtrim
203
195
  format = :sanger
204
196
  end
205
197
 
198
+ seqs_path=''
199
+
206
200
  $LOG.info("Used FastQ format for input files: #{format}")
201
+ # iterate files
202
+ options[:fastq].each do |fastq_file|
203
+
204
+ if fastq_file=='-'
205
+ seqs_path = STDIN
206
+ else
207
+ seqs_path = File.expand_path(fastq_file)
208
+ end
209
+
210
+ sequence_readers << FastqFile.new(seqs_path,'r',format, true)
211
+
212
+ end
207
213
 
208
- sequence_reader = FastqFile.new(seqs_path,'r',format, true)
209
- # cd_hit_input_file = 'cd-hit-input.fasta'
210
214
  cd_hit_input_file = seqs_path
211
- # $LOG.info "Converting input file for cd-hit-454"
212
- # $LOG.info "Conversion done"
213
215
 
214
216
  else
215
217
 
@@ -217,7 +219,7 @@ class Seqtrim
217
219
  cd_hit_input_file = seqs_path
218
220
 
219
221
  qual_path = File.expand_path(options[:qual]) if qual_path
220
- sequence_reader = FastaQualFile.new(options[:fasta],options[:qual],true)
222
+ sequence_readers << FastaQualFile.new(options[:fasta],options[:qual],true)
221
223
 
222
224
  end
223
225
 
@@ -250,12 +252,11 @@ class Seqtrim
250
252
  $LOG.error "Plugin check failed"
251
253
 
252
254
  # save used params to file
253
- params.save_file('used_params.txt')
255
+ params.save_file('used_params.txt')
254
256
 
255
257
  exit
256
258
  end
257
259
 
258
-
259
260
  if !Dir.exists?(OUTPUT_PATH)
260
261
  Dir.mkdir(OUTPUT_PATH)
261
262
  end
@@ -263,8 +264,7 @@ class Seqtrim
263
264
  # Extract global stats
264
265
  if params.get_param('generate_initial_stats')=='true'
265
266
  $LOG.info "Calculatings stats"
266
-
267
- ExtractStats.new(sequence_reader,params)
267
+ ExtractStats.new(sequence_readers,params)
268
268
  else
269
269
  $LOG.info "Skipping calculatings stats phase."
270
270
  end
@@ -274,14 +274,6 @@ class Seqtrim
274
274
  params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
275
275
 
276
276
  piro_on = (params.get_param('next_generation_sequences')=='true')
277
-
278
- # format blast database with truncated file
279
- #MakeBlastDb.format_db(es.truncated_file_path,File.basename(es.truncated_file_path,File.extname(es.truncated_file_path)),'./') if piro_on
280
-
281
- # leer mids
282
- # params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
283
- # params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
284
- # params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
285
277
 
286
278
  params.load_mids(params.get_param('mids_db'))
287
279
  params.load_ab_adapters(params.get_param('adapters_ab_db'))
@@ -334,7 +326,7 @@ class Seqtrim
334
326
  else
335
327
  $LOG.info 'Starting server'
336
328
 
337
- SeqtrimWorkManager.init_work_manager(sequence_reader, params,chunk_size,use_json,options[:skip_output])
329
+ SeqtrimWorkManager.init_work_manager(sequence_readers, params,chunk_size,use_json,options[:skip_output])
338
330
 
339
331
  begin
340
332
  cpus=1
@@ -371,7 +363,10 @@ class Seqtrim
371
363
  server.start_server
372
364
 
373
365
  # close sequence reader
374
- sequence_reader.close
366
+ sequence_readers.each do |file|
367
+ file.close
368
+ end
369
+
375
370
  $LOG.info 'Closing server'
376
371
  end
377
372
 
@@ -20,8 +20,8 @@ class Sequence
20
20
 
21
21
  @seq_rejected_by_message=''
22
22
 
23
- @ns_present = ns_present?
24
- @xs_present = xs_present?
23
+ @ns_present = ns_present?
24
+ @xs_present = xs_present?
25
25
 
26
26
 
27
27
 
@@ -10,6 +10,7 @@ class SequenceGroup
10
10
  @seqs=seqs
11
11
  @output_text={}
12
12
  @output_files={}
13
+
13
14
  end
14
15
 
15
16
 
@@ -31,6 +32,13 @@ class SequenceGroup
31
32
  yield seq
32
33
  end
33
34
  end
35
+
36
+ def each_slice(n)
37
+ @seqs.each_slice(n) do |seqs|
38
+ yield seqs
39
+ end
40
+ end
41
+
34
42
 
35
43
  def each_with_index
36
44
  @seqs.each_with_index do |seq,i|
@@ -46,7 +54,19 @@ class SequenceGroup
46
54
  end
47
55
 
48
56
  def add(array)
49
- @seqs += array
57
+ @seqs = @seqs + array
58
+
59
+ # sort by tuple_id and order in tuple
60
+ @seqs.sort! do |a,b|
61
+ comp = (a.tuple_id <=> b.tuple_id)
62
+ comp.zero? ? (a.order_in_tuple <=> b.order_in_tuple) : comp
63
+ end
64
+
65
+ # print
66
+ # @seqs.each do |s|
67
+ # puts "TID:#{s.tuple_id}, OIT: #{s.order_in_tuple}"
68
+ # end
69
+
50
70
  end
51
71
 
52
72
  def count
@@ -14,14 +14,13 @@ include Term::ANSIColor
14
14
  class SequenceWithAction < Sequence
15
15
  SHOW_QUAL = false
16
16
  SHOW_FINAL_INSERTS=true
17
- attr_accessor :actions,:seq_fasta_orig, :seq_qual_orig ,:insert_start , :insert_end, :stats , :insert_start_last , :insert_end_last
17
+
18
+ attr_accessor :actions,:seq_fasta_orig, :seq_qual_orig ,:insert_start , :insert_end, :stats , :insert_start_last , :insert_end_last, :order_in_tuple, :tuple_id, :tuple_size
18
19
 
19
20
  # Creates an instance with the structure to storage the actions associated to a sequence
20
21
  def initialize(seq_name,seq_fasta,seq_qual, seq_comment = '')
21
22
  super
22
- #Tried
23
- #if @ns_present then $LOG.debug "The sequence #{seq_name} has N's" else $LOG.debug "The sequence #{seq_name} hasn't N's" end
24
- #if @xs_present then $LOG.debug "The sequence #{seq_name} has X's" else $LOG.debug "The sequence #{seq_name} hasn't X's" end
23
+
25
24
  @actions = []
26
25
  @seq_fasta_orig = seq_fasta
27
26
  @seq_fasta = seq_fasta
@@ -32,18 +31,31 @@ class SequenceWithAction < Sequence
32
31
  @insert_start = 0
33
32
  @insert_end = seq_fasta.length-1
34
33
 
35
-
36
- #@seq_qual = seq_qual
37
-
38
34
  @stats={}
35
+ @comments=[]
39
36
 
40
37
  @file_tags=[]
41
38
 
39
+ # for paired ends
40
+ @order_in_tuple=0
41
+ @tuple_id=0
42
+ @tuple_size=0
43
+ @file_tag_tuple_priority=0
44
+
45
+ end
46
+
47
+ def add_comment(comment)
48
+ @comments.push comment
49
+ end
50
+
51
+ def get_comment_line
52
+ return ([@seq_rejected_by_message]+@comments).compact.join(';')
42
53
  end
43
54
 
44
55
  # add a file tag to sequence
45
- def add_file_tag(tag_level, tag_value, tag_type)
56
+ def add_file_tag(tag_level, tag_value, tag_type, priority=0)
46
57
  @file_tags<< {:level => tag_level, :name => tag_value, :type=> tag_type}
58
+ @file_tag_tuple_priority=priority
47
59
  end
48
60
 
49
61
  # join file tags into a path
@@ -71,7 +83,7 @@ class SequenceWithAction < Sequence
71
83
 
72
84
  # puts "#{dirname}, #{filename}"
73
85
 
74
- return [dirname,filename]
86
+ return [dirname,filename,@file_tag_tuple_priority]
75
87
 
76
88
  end
77
89
 
@@ -328,16 +340,16 @@ class SequenceWithAction < Sequence
328
340
  output_res=[]
329
341
 
330
342
  if @seq_rejected
331
- output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
343
+ output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline + " REJECTED: #{@seq_rejected_by_message}".red
332
344
  # puts @seq_name.bold + bold + ' REJECTED BECAUSE ' +@seq_rejected_by_message.bold if @seq_rejected
333
345
  else
334
- output_res<< " Sequence #{seq_name} had the next actions: ".bold.underline
346
+ output_res<< "[#{@tuple_id},#{@order_in_tuple}] Sequence #{seq_name} had the next actions: ".bold.underline
335
347
 
336
- end
348
+ end
337
349
 
338
350
  n=1
339
351
  withMessage = ["ActionIsContaminated","ActionVectors","ActionBadAdapter","ActionLeftAdapter","ActionRightAdapter"]
340
- color = red
352
+ color = red
341
353
 
342
354
  @actions.sort!{|e,f| e.start_pos<=>f.start_pos}.each do |a|
343
355
  a_type=a.action_type