seqtrimnext 2.0.51 → 2.0.52

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -15,23 +15,48 @@ class Plugin
15
15
  attr_accessor :stats
16
16
 
17
17
  #Loads the plugin's execution whit the sequence "seq"
18
- def initialize(seq,params)
19
- # $LOG.debug self.class.to_s + " processing sequence: " + seq.seq_name
20
- # if (!(self.class.to_s=='PluginLowQuality') )
18
+ def initialize(seq, params)
21
19
  @params = params
22
20
  @stats ={}
23
- execute(seq)
24
- # puts self.class.to_s + ' PPPPPPPPPP'
25
- # else
26
- # $LOG.error " Quality File haven't been provided. It's impossible to execute " +self.class.to_s + seq.seq_qual.nil?.to_s
27
- # end
21
+
22
+ if can_execute?
23
+ execute(seq)
24
+ end
25
+
26
+ end
27
+
28
+ def can_execute?
29
+ return true
28
30
  end
29
31
 
30
32
  #Begins the plugin's execution whit the sequence "seq"
31
33
  def execute(seqs)
34
+ blasts=do_blasts(seqs)
35
+
36
+ if !blasts.empty?
37
+
38
+ if blasts.is_a?(Array)
39
+ queries=blasts
40
+ else
41
+ queries = blasts.querys
42
+ end
43
+
44
+ seqs.each_with_index do |s,i|
45
+ exec_seq(s,queries[i])
46
+ end
47
+
48
+ else # there is no blast
32
49
 
50
+ seqs.each do |s|
51
+ exec_seq(s,nil)
52
+ end
53
+
54
+ end
33
55
  end
34
56
 
57
+ def do_blasts(seqs)
58
+ return []
59
+ end
35
60
 
36
61
  #Initializes the structure stats to the given key and value , only when it is neccesary, and increases its counter
37
62
  def add_stats(key,value)
@@ -69,14 +94,19 @@ class Plugin
69
94
  return ((r1_start<=r2_end+1) and (r1_end>=r2_start-1) )
70
95
  end
71
96
 
72
- def merge_hits(hits,merged_hits,merged_ids=nil)
97
+ def merge_hits(hits,merged_hits,merged_ids=nil, merge_different_ids=true)
73
98
  # puts " merging ============"
74
99
  # hits.each do |hit|
75
100
  hits.sort{|h1,h2| (h1.q_end-h1.q_beg+1)<=>(h2.q_end-h2.q_beg+1)}.reverse_each do |hit|
76
101
 
77
102
  merged_ids.push hit.definition if !merged_ids.nil? && (! merged_ids.include?(hit.definition))
78
103
  # if new hit's position is already contained in hits, then ignore the new hit
79
- c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
104
+ if merge_different_ids
105
+ c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
106
+ else
107
+ # overlap with existent hit and same subject id
108
+ c=merged_hits.find{|c| (overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end) && (hit.subject_id==c.subject_id))}
109
+ end
80
110
  # puts " c #{c.inspect}"
81
111
 
82
112
  if (c.nil?)
@@ -89,10 +119,10 @@ class Plugin
89
119
 
90
120
  # one is inside each other, just ignore
91
121
  if ((hit.q_beg>=c.q_beg && hit.q_end <=c.q_end) || (c.q_beg>=hit.q_beg && c.q_end <= hit.q_end))
92
- # puts "* #{hit.subject_id} inside #{c.subject_id}"
122
+ # puts "* #{hit.subject_id} inside #{c.subject_id}"
93
123
  else
94
124
  # merge with old contaminant
95
- # puts "#{hit.subject_id} NOT inside #{c.subject_id}"
125
+ # puts "#{hit.subject_id} NOT inside #{c.subject_id}"
96
126
  min=[c.q_beg,hit.q_beg].min
97
127
  max=[c.q_end,hit.q_end].max
98
128
 
@@ -13,16 +13,9 @@ class PluginAbAdapters < Plugin
13
13
  MIN_ADAPTER_SIZE = 5
14
14
  MIN_FAR_ADAPTER_SIZE = 13
15
15
  MIN_LEFT_ADAPTER_SIZE = 9
16
- #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
17
- def execute(seqs)
18
- blasts= do_blasts(seqs)
19
-
20
- seqs.each_with_index do |s,i|
21
- exec_seq(s,blasts.querys[i])
22
- end
23
- end
24
16
 
25
17
  def do_blasts(seqs)
18
+
26
19
  # find MIDS with less results than max_target_seqs value
27
20
  blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
28
21
  $LOG.debug('BLAST:'+blast.get_blast_cmd)
@@ -12,15 +12,6 @@ class PluginAdapters < Plugin
12
12
  # adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
13
13
  MIN_ADAPTER_SIZE = 5
14
14
  MIN_FAR_ADAPTER_SIZE = 13
15
- # MIN_LEFT_ADAPTER_SIZE = 9
16
- #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
17
- def execute(seqs)
18
- blasts= do_blasts(seqs)
19
-
20
- seqs.each_with_index do |s,i|
21
- exec_seq(s,blasts.querys[i])
22
- end
23
- end
24
15
 
25
16
  def do_blasts(seqs)
26
17
  # find MIDS with less results than max_target_seqs value
@@ -9,18 +9,6 @@ require "plugin"
9
9
 
10
10
  class PluginAmplicons < Plugin
11
11
 
12
- # adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
13
- # MIN_PRIMER_SIZE = 5
14
- # MIN_FAR_ADAPTER_SIZE = 13
15
- # MIN_LEFT_ADAPTER_SIZE = 9
16
- #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
17
- def execute(seqs)
18
- blasts= do_blasts(seqs)
19
-
20
- seqs.each_with_index do |s,i|
21
- exec_seq(s,blasts.querys[i])
22
- end
23
- end
24
12
 
25
13
  def do_blasts(seqs)
26
14
  # find MIDS with less results than max_target_seqs value
@@ -18,14 +18,6 @@ class PluginContaminants < Plugin
18
18
  max_to_extreme=(min_cont_size/2).to_i
19
19
  return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
20
20
  end
21
- #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
22
- def execute(seqs)
23
- blasts= do_blasts(seqs)
24
-
25
- seqs.each_with_index do |s,i|
26
- exec_seq(s,blasts.querys[i])
27
- end
28
- end
29
21
 
30
22
  def do_blasts(seqs)
31
23
  # find MIDS with less results than max_target_seqs value
@@ -63,6 +55,11 @@ class PluginContaminants < Plugin
63
55
 
64
56
 
65
57
  def exec_seq(seq,blast_query)
58
+
59
+ if seq.get_actions(ActionUserContaminant)
60
+ return
61
+ end
62
+
66
63
  if blast_query.query_id != seq.seq_name
67
64
  # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
68
65
  end
@@ -276,16 +276,7 @@ class PluginExtractInserts < Plugin
276
276
  return sub_inserts
277
277
  end
278
278
 
279
-
280
- # Begins the plugin1's execution to warn that there is linker into the sequence
281
- def execute(seqs)
282
- seqs.each do |s|
283
- exec_seq(s)
284
- end
285
- end
286
-
287
-
288
- def exec_seq(seq)
279
+ def exec_seq(seq,blast_query)
289
280
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
290
281
 
291
282
  # puts "INSERTO ANTES LINKER INSERT:"+seq.seq_fasta
@@ -232,9 +232,6 @@ class PluginFindPolyAt < Plugin
232
232
  end
233
233
 
234
234
 
235
-
236
-
237
-
238
235
  # if (poly['begin']<=MAX_POLY_T_FROM_LEFT*2)
239
236
  # seq.seq_rejected=true
240
237
  # seq.seq_rejected_by_message='polyT found'
@@ -311,14 +308,7 @@ class PluginFindPolyAt < Plugin
311
308
 
312
309
 
313
310
 
314
- def execute(seqs)
315
- seqs.each do |s|
316
- exec_seq(s)
317
- end
318
- end
319
-
320
-
321
- def exec_seq(seq)
311
+ def exec_seq(seq,blast_query)
322
312
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for strings of polyAT's into the sequence with a length indicated by the param <poly_at_length>"
323
313
 
324
314
  find_polyT(seq)
@@ -13,14 +13,8 @@ class PluginIgnoreRepeated < Plugin
13
13
  SIZE_SEARCH_IN_IGNORE=15
14
14
 
15
15
  #Begins the plugin1's execution to warn that there are repeated sequences, and disables all but one"
16
- def execute(seqs)
17
- seqs.each do |s|
18
- exec_seq(s)
19
- end
20
- end
16
+ def exec_seq(seq,blast_query)
21
17
 
22
-
23
- def exec_seq(seq)
24
18
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: searching sequence repeated at input file"
25
19
 
26
20
  fasta_input=@params.get_param('truncated_input_file')
@@ -140,15 +140,8 @@ class PluginIndeterminations < Plugin
140
140
  end
141
141
 
142
142
 
143
- #Begins the pluginKey's execution to warn where is a key in the sequence "seq"
144
- def execute(seqs)
145
- seqs.each do |s|
146
- exec_seq(s)
147
- end
148
- end
149
-
143
+ def exec_seq(seq,blast_query)
150
144
 
151
- def exec_seq(seq)
152
145
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+"
153
146
 
154
147
  actions=[]
@@ -11,16 +11,8 @@ class PluginKey < Plugin
11
11
 
12
12
 
13
13
  #Begins the pluginKey's execution to warn where is a key in the sequence "seq"
14
- def execute(seqs)
15
- @group_by_key=(@params.get_param('use_independent_folder_for_each_key')=='true')
16
-
17
- seqs.each do |s|
18
- exec_seq(s)
19
- end
20
- end
21
-
14
+ def exec_seq(seq,blast_query)
22
15
 
23
- def exec_seq(seq)
24
16
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
25
17
  # blast_table_results.inspect
26
18
 
@@ -70,15 +70,6 @@ class PluginLinker < Plugin
70
70
  return res
71
71
  end
72
72
 
73
- # Begins the plugin1's execution to warn that there is linker into the sequence
74
- def execute(seqs)
75
- blasts= do_blasts(seqs)
76
-
77
- seqs.each_with_index do |s,i|
78
- exec_seq(s,blasts.querys[i])
79
- end
80
- end
81
-
82
73
  def do_blasts(seqs)
83
74
  # find MIDS with less results than max_target_seqs value
84
75
  blast = BatchBlast.new("-db #{@params.get_param('linkers_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
@@ -12,19 +12,10 @@ require "plugin"
12
12
  MIN_DUST_SIZE = 30
13
13
 
14
14
  class PluginLowComplexity < Plugin
15
-
16
-
17
-
18
- def execute(seqs)
19
- dust= do_dust(seqs)
20
-
21
- seqs.each_with_index do |s,i|
22
- exec_seq(s,dust[i])
23
- end
24
- end
25
15
 
26
- def do_dust(seqs)
27
- # find MIDS with less results than max_target_seqs value
16
+ # do the dust masker instead of blast
17
+ def do_blasts(seqs)
18
+
28
19
  dust_masker=DustMasker.new()
29
20
 
30
21
  fastas=[]
@@ -44,7 +35,9 @@ class PluginLowComplexity < Plugin
44
35
  end
45
36
 
46
37
 
47
- def exec_seq(seq,dust_query)
38
+ def exec_seq(seq,blast_query)
39
+ dust_query=blast_query
40
+
48
41
  if dust_query.query_id != seq.seq_name
49
42
  raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
50
43
  end
@@ -65,14 +58,6 @@ class PluginLowComplexity < Plugin
65
58
  if (dust_size)>=MIN_DUST_SIZE
66
59
 
67
60
  # check if low complexity is inside a lowqual region
68
- # in_low_qual=false
69
- # low_quals.each do |lq|
70
- # if lq.contains_action?(start,stop,0)
71
- # in_low_qual=true
72
- # break
73
- # end
74
- # end
75
-
76
61
  if !seq.range_inside_action_type?(start,stop,ActionLowQuality)
77
62
 
78
63
  total_dust+=dust_size
@@ -13,15 +13,9 @@ class PluginLowHighSize < Plugin
13
13
 
14
14
 
15
15
  # Begins the plugin_low_high_size's execution with the sequence "seq"
16
-
17
- def execute(seqs)
18
- seqs.each do |s|
19
- exec_seq(s)
20
- end
21
- end
22
-
23
-
24
- def exec_seq(seq)
16
+
17
+ def exec_seq(seq,blast_query)
18
+
25
19
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
26
20
 
27
21
  min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
@@ -46,10 +40,6 @@ class PluginLowHighSize < Plugin
46
40
 
47
41
 
48
42
  end
49
-
50
- ######################################################################
51
- #---------------------------------------------------------------------
52
-
53
43
 
54
44
 
55
45
  ######################################################################