seqtrimnext 2.0.51 → 2.0.52
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/Manifest.txt +3 -3
- data/README.rdoc +18 -3
- data/Rakefile +2 -1
- data/bin/parse_params.rb +5 -1
- data/bin/seqtrimnext +53 -21
- data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
- data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
- data/lib/seqtrimnext/classes/params.rb +109 -123
- data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
- data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
- data/lib/seqtrimnext/classes/sequence.rb +2 -2
- data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
- data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
- data/lib/seqtrimnext/plugins/plugin.rb +42 -12
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
- data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
- data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
- data/lib/seqtrimnext/templates/amplicons.txt +1 -8
- data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
- data/lib/seqtrimnext/templates/only_quality.txt +24 -0
- data/lib/seqtrimnext/templates/sanger.txt +25 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
- data/lib/seqtrimnext.rb +1 -1
- metadata +20 -7
- data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -15,23 +15,48 @@ class Plugin
|
|
15
15
|
attr_accessor :stats
|
16
16
|
|
17
17
|
#Loads the plugin's execution whit the sequence "seq"
|
18
|
-
def initialize(seq,params)
|
19
|
-
# $LOG.debug self.class.to_s + " processing sequence: " + seq.seq_name
|
20
|
-
# if (!(self.class.to_s=='PluginLowQuality') )
|
18
|
+
def initialize(seq, params)
|
21
19
|
@params = params
|
22
20
|
@stats ={}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
|
22
|
+
if can_execute?
|
23
|
+
execute(seq)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def can_execute?
|
29
|
+
return true
|
28
30
|
end
|
29
31
|
|
30
32
|
#Begins the plugin's execution whit the sequence "seq"
|
31
33
|
def execute(seqs)
|
34
|
+
blasts=do_blasts(seqs)
|
35
|
+
|
36
|
+
if !blasts.empty?
|
37
|
+
|
38
|
+
if blasts.is_a?(Array)
|
39
|
+
queries=blasts
|
40
|
+
else
|
41
|
+
queries = blasts.querys
|
42
|
+
end
|
43
|
+
|
44
|
+
seqs.each_with_index do |s,i|
|
45
|
+
exec_seq(s,queries[i])
|
46
|
+
end
|
47
|
+
|
48
|
+
else # there is no blast
|
32
49
|
|
50
|
+
seqs.each do |s|
|
51
|
+
exec_seq(s,nil)
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
33
55
|
end
|
34
56
|
|
57
|
+
def do_blasts(seqs)
|
58
|
+
return []
|
59
|
+
end
|
35
60
|
|
36
61
|
#Initializes the structure stats to the given key and value , only when it is neccesary, and increases its counter
|
37
62
|
def add_stats(key,value)
|
@@ -69,14 +94,19 @@ class Plugin
|
|
69
94
|
return ((r1_start<=r2_end+1) and (r1_end>=r2_start-1) )
|
70
95
|
end
|
71
96
|
|
72
|
-
def merge_hits(hits,merged_hits,merged_ids=nil)
|
97
|
+
def merge_hits(hits,merged_hits,merged_ids=nil, merge_different_ids=true)
|
73
98
|
# puts " merging ============"
|
74
99
|
# hits.each do |hit|
|
75
100
|
hits.sort{|h1,h2| (h1.q_end-h1.q_beg+1)<=>(h2.q_end-h2.q_beg+1)}.reverse_each do |hit|
|
76
101
|
|
77
102
|
merged_ids.push hit.definition if !merged_ids.nil? && (! merged_ids.include?(hit.definition))
|
78
103
|
# if new hit's position is already contained in hits, then ignore the new hit
|
79
|
-
|
104
|
+
if merge_different_ids
|
105
|
+
c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
|
106
|
+
else
|
107
|
+
# overlap with existent hit and same subject id
|
108
|
+
c=merged_hits.find{|c| (overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end) && (hit.subject_id==c.subject_id))}
|
109
|
+
end
|
80
110
|
# puts " c #{c.inspect}"
|
81
111
|
|
82
112
|
if (c.nil?)
|
@@ -89,10 +119,10 @@ class Plugin
|
|
89
119
|
|
90
120
|
# one is inside each other, just ignore
|
91
121
|
if ((hit.q_beg>=c.q_beg && hit.q_end <=c.q_end) || (c.q_beg>=hit.q_beg && c.q_end <= hit.q_end))
|
92
|
-
|
122
|
+
# puts "* #{hit.subject_id} inside #{c.subject_id}"
|
93
123
|
else
|
94
124
|
# merge with old contaminant
|
95
|
-
|
125
|
+
# puts "#{hit.subject_id} NOT inside #{c.subject_id}"
|
96
126
|
min=[c.q_beg,hit.q_beg].min
|
97
127
|
max=[c.q_end,hit.q_end].max
|
98
128
|
|
@@ -13,16 +13,9 @@ class PluginAbAdapters < Plugin
|
|
13
13
|
MIN_ADAPTER_SIZE = 5
|
14
14
|
MIN_FAR_ADAPTER_SIZE = 13
|
15
15
|
MIN_LEFT_ADAPTER_SIZE = 9
|
16
|
-
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
17
|
-
def execute(seqs)
|
18
|
-
blasts= do_blasts(seqs)
|
19
|
-
|
20
|
-
seqs.each_with_index do |s,i|
|
21
|
-
exec_seq(s,blasts.querys[i])
|
22
|
-
end
|
23
|
-
end
|
24
16
|
|
25
17
|
def do_blasts(seqs)
|
18
|
+
|
26
19
|
# find MIDS with less results than max_target_seqs value
|
27
20
|
blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
|
28
21
|
$LOG.debug('BLAST:'+blast.get_blast_cmd)
|
@@ -12,15 +12,6 @@ class PluginAdapters < Plugin
|
|
12
12
|
# adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
|
13
13
|
MIN_ADAPTER_SIZE = 5
|
14
14
|
MIN_FAR_ADAPTER_SIZE = 13
|
15
|
-
# MIN_LEFT_ADAPTER_SIZE = 9
|
16
|
-
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
17
|
-
def execute(seqs)
|
18
|
-
blasts= do_blasts(seqs)
|
19
|
-
|
20
|
-
seqs.each_with_index do |s,i|
|
21
|
-
exec_seq(s,blasts.querys[i])
|
22
|
-
end
|
23
|
-
end
|
24
15
|
|
25
16
|
def do_blasts(seqs)
|
26
17
|
# find MIDS with less results than max_target_seqs value
|
@@ -9,18 +9,6 @@ require "plugin"
|
|
9
9
|
|
10
10
|
class PluginAmplicons < Plugin
|
11
11
|
|
12
|
-
# adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
|
13
|
-
# MIN_PRIMER_SIZE = 5
|
14
|
-
# MIN_FAR_ADAPTER_SIZE = 13
|
15
|
-
# MIN_LEFT_ADAPTER_SIZE = 9
|
16
|
-
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
17
|
-
def execute(seqs)
|
18
|
-
blasts= do_blasts(seqs)
|
19
|
-
|
20
|
-
seqs.each_with_index do |s,i|
|
21
|
-
exec_seq(s,blasts.querys[i])
|
22
|
-
end
|
23
|
-
end
|
24
12
|
|
25
13
|
def do_blasts(seqs)
|
26
14
|
# find MIDS with less results than max_target_seqs value
|
@@ -18,14 +18,6 @@ class PluginContaminants < Plugin
|
|
18
18
|
max_to_extreme=(min_cont_size/2).to_i
|
19
19
|
return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
|
20
20
|
end
|
21
|
-
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
22
|
-
def execute(seqs)
|
23
|
-
blasts= do_blasts(seqs)
|
24
|
-
|
25
|
-
seqs.each_with_index do |s,i|
|
26
|
-
exec_seq(s,blasts.querys[i])
|
27
|
-
end
|
28
|
-
end
|
29
21
|
|
30
22
|
def do_blasts(seqs)
|
31
23
|
# find MIDS with less results than max_target_seqs value
|
@@ -63,6 +55,11 @@ class PluginContaminants < Plugin
|
|
63
55
|
|
64
56
|
|
65
57
|
def exec_seq(seq,blast_query)
|
58
|
+
|
59
|
+
if seq.get_actions(ActionUserContaminant)
|
60
|
+
return
|
61
|
+
end
|
62
|
+
|
66
63
|
if blast_query.query_id != seq.seq_name
|
67
64
|
# raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
68
65
|
end
|
@@ -276,16 +276,7 @@ class PluginExtractInserts < Plugin
|
|
276
276
|
return sub_inserts
|
277
277
|
end
|
278
278
|
|
279
|
-
|
280
|
-
# Begins the plugin1's execution to warn that there is linker into the sequence
|
281
|
-
def execute(seqs)
|
282
|
-
seqs.each do |s|
|
283
|
-
exec_seq(s)
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
|
288
|
-
def exec_seq(seq)
|
279
|
+
def exec_seq(seq,blast_query)
|
289
280
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
|
290
281
|
|
291
282
|
# puts "INSERTO ANTES LINKER INSERT:"+seq.seq_fasta
|
@@ -232,9 +232,6 @@ class PluginFindPolyAt < Plugin
|
|
232
232
|
end
|
233
233
|
|
234
234
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
235
|
# if (poly['begin']<=MAX_POLY_T_FROM_LEFT*2)
|
239
236
|
# seq.seq_rejected=true
|
240
237
|
# seq.seq_rejected_by_message='polyT found'
|
@@ -311,14 +308,7 @@ class PluginFindPolyAt < Plugin
|
|
311
308
|
|
312
309
|
|
313
310
|
|
314
|
-
|
315
|
-
seqs.each do |s|
|
316
|
-
exec_seq(s)
|
317
|
-
end
|
318
|
-
end
|
319
|
-
|
320
|
-
|
321
|
-
def exec_seq(seq)
|
311
|
+
def exec_seq(seq,blast_query)
|
322
312
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for strings of polyAT's into the sequence with a length indicated by the param <poly_at_length>"
|
323
313
|
|
324
314
|
find_polyT(seq)
|
@@ -13,14 +13,8 @@ class PluginIgnoreRepeated < Plugin
|
|
13
13
|
SIZE_SEARCH_IN_IGNORE=15
|
14
14
|
|
15
15
|
#Begins the plugin1's execution to warn that there are repeated sequences, and disables all but one"
|
16
|
-
def
|
17
|
-
seqs.each do |s|
|
18
|
-
exec_seq(s)
|
19
|
-
end
|
20
|
-
end
|
16
|
+
def exec_seq(seq,blast_query)
|
21
17
|
|
22
|
-
|
23
|
-
def exec_seq(seq)
|
24
18
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: searching sequence repeated at input file"
|
25
19
|
|
26
20
|
fasta_input=@params.get_param('truncated_input_file')
|
@@ -140,15 +140,8 @@ class PluginIndeterminations < Plugin
|
|
140
140
|
end
|
141
141
|
|
142
142
|
|
143
|
-
|
144
|
-
def execute(seqs)
|
145
|
-
seqs.each do |s|
|
146
|
-
exec_seq(s)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
143
|
+
def exec_seq(seq,blast_query)
|
150
144
|
|
151
|
-
def exec_seq(seq)
|
152
145
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+"
|
153
146
|
|
154
147
|
actions=[]
|
@@ -11,16 +11,8 @@ class PluginKey < Plugin
|
|
11
11
|
|
12
12
|
|
13
13
|
#Begins the pluginKey's execution to warn where is a key in the sequence "seq"
|
14
|
-
|
15
|
-
@group_by_key=(@params.get_param('use_independent_folder_for_each_key')=='true')
|
16
|
-
|
17
|
-
seqs.each do |s|
|
18
|
-
exec_seq(s)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
14
|
+
def exec_seq(seq,blast_query)
|
22
15
|
|
23
|
-
def exec_seq(seq)
|
24
16
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
|
25
17
|
# blast_table_results.inspect
|
26
18
|
|
@@ -70,15 +70,6 @@ class PluginLinker < Plugin
|
|
70
70
|
return res
|
71
71
|
end
|
72
72
|
|
73
|
-
# Begins the plugin1's execution to warn that there is linker into the sequence
|
74
|
-
def execute(seqs)
|
75
|
-
blasts= do_blasts(seqs)
|
76
|
-
|
77
|
-
seqs.each_with_index do |s,i|
|
78
|
-
exec_seq(s,blasts.querys[i])
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
73
|
def do_blasts(seqs)
|
83
74
|
# find MIDS with less results than max_target_seqs value
|
84
75
|
blast = BatchBlast.new("-db #{@params.get_param('linkers_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
@@ -12,19 +12,10 @@ require "plugin"
|
|
12
12
|
MIN_DUST_SIZE = 30
|
13
13
|
|
14
14
|
class PluginLowComplexity < Plugin
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
def execute(seqs)
|
19
|
-
dust= do_dust(seqs)
|
20
|
-
|
21
|
-
seqs.each_with_index do |s,i|
|
22
|
-
exec_seq(s,dust[i])
|
23
|
-
end
|
24
|
-
end
|
25
15
|
|
26
|
-
|
27
|
-
|
16
|
+
# do the dust masker instead of blast
|
17
|
+
def do_blasts(seqs)
|
18
|
+
|
28
19
|
dust_masker=DustMasker.new()
|
29
20
|
|
30
21
|
fastas=[]
|
@@ -44,7 +35,9 @@ class PluginLowComplexity < Plugin
|
|
44
35
|
end
|
45
36
|
|
46
37
|
|
47
|
-
def exec_seq(seq,
|
38
|
+
def exec_seq(seq,blast_query)
|
39
|
+
dust_query=blast_query
|
40
|
+
|
48
41
|
if dust_query.query_id != seq.seq_name
|
49
42
|
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
50
43
|
end
|
@@ -65,14 +58,6 @@ class PluginLowComplexity < Plugin
|
|
65
58
|
if (dust_size)>=MIN_DUST_SIZE
|
66
59
|
|
67
60
|
# check if low complexity is inside a lowqual region
|
68
|
-
# in_low_qual=false
|
69
|
-
# low_quals.each do |lq|
|
70
|
-
# if lq.contains_action?(start,stop,0)
|
71
|
-
# in_low_qual=true
|
72
|
-
# break
|
73
|
-
# end
|
74
|
-
# end
|
75
|
-
|
76
61
|
if !seq.range_inside_action_type?(start,stop,ActionLowQuality)
|
77
62
|
|
78
63
|
total_dust+=dust_size
|
@@ -13,15 +13,9 @@ class PluginLowHighSize < Plugin
|
|
13
13
|
|
14
14
|
|
15
15
|
# Begins the plugin_low_high_size's execution with the sequence "seq"
|
16
|
-
|
17
|
-
def
|
18
|
-
|
19
|
-
exec_seq(s)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
def exec_seq(seq)
|
16
|
+
|
17
|
+
def exec_seq(seq,blast_query)
|
18
|
+
|
25
19
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
|
26
20
|
|
27
21
|
min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
|
@@ -46,10 +40,6 @@ class PluginLowHighSize < Plugin
|
|
46
40
|
|
47
41
|
|
48
42
|
end
|
49
|
-
|
50
|
-
######################################################################
|
51
|
-
#---------------------------------------------------------------------
|
52
|
-
|
53
43
|
|
54
44
|
|
55
45
|
######################################################################
|