seqtrimnext 2.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
@@ -0,0 +1,70 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginKey
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class PluginKey < Plugin
|
11
|
+
|
12
|
+
|
13
|
+
#Begins the pluginKey's execution to warn where is a key in the sequence "seq"
|
14
|
+
def execute(seqs)
|
15
|
+
@group_by_key=(@params.get_param('use_independent_folder_for_each_key')=='true')
|
16
|
+
|
17
|
+
seqs.each do |s|
|
18
|
+
exec_seq(s)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def exec_seq(seq)
|
24
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
|
25
|
+
# blast_table_results.inspect
|
26
|
+
|
27
|
+
actions=[]
|
28
|
+
|
29
|
+
key_size=0
|
30
|
+
# mid_size=0
|
31
|
+
key_beg,key_end=[0,3]
|
32
|
+
key_size=4
|
33
|
+
key=seq.seq_fasta[0..3].upcase
|
34
|
+
|
35
|
+
a = seq.new_action(key_beg,key_end,'ActionKey') # adds the actionKey to the sequence
|
36
|
+
actions.push a
|
37
|
+
|
38
|
+
#Add actions
|
39
|
+
seq.add_actions(actions)
|
40
|
+
|
41
|
+
|
42
|
+
if @group_by_key
|
43
|
+
|
44
|
+
seq.add_file_tag(0,'key_' + key, :dir)
|
45
|
+
add_stats('key_tag',key)
|
46
|
+
end
|
47
|
+
|
48
|
+
add_stats('key_size',key_size)
|
49
|
+
# add_stats('mid_size',mid_size)
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
#Returns an array with the errors due to parameters are missing
|
56
|
+
def self.check_params(params)
|
57
|
+
errors=[]
|
58
|
+
|
59
|
+
# self.check_param(errors,params,'blast_evalue_mids','Float')
|
60
|
+
# self.check_param(errors,params,'blast_percent_mids','Integer')
|
61
|
+
comment='sequences containing with diferent keys (barcodes) are saved to separate folders'
|
62
|
+
default_value='false'
|
63
|
+
params.check_param(errors,'use_independent_folder_for_each_key','String',default_value,comment)
|
64
|
+
|
65
|
+
|
66
|
+
return errors
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,232 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginLinker
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
class PluginLinker < Plugin
|
10
|
+
MAX_LINKER_ERRORS=2
|
11
|
+
#-------------------------------------------------------------------------
|
12
|
+
#It's created an ActionInsert or ActionShortInsert before the ActionLinker
|
13
|
+
#Used: in class PluginLinker and PluginMid
|
14
|
+
#-------------------------------------------------------------------------
|
15
|
+
# def add_action_before_linker(p_q_beg,size_insert,actions,seq)
|
16
|
+
#
|
17
|
+
# size_min_insert = @params.get_param('size_min_insert').to_i
|
18
|
+
# if ((p_q_beg>0) && (size_insert>=size_min_insert)) #if linker's positions are right
|
19
|
+
# #It's created an ActionInsert or ActionShortInsert before the ActionLinker
|
20
|
+
# a = seq.new_action(0,p_q_beg-1,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
21
|
+
# actions.push a
|
22
|
+
# elsif (p_q_beg>0) #if linker's positions are right and insert's size is short
|
23
|
+
# #It's created an ActionShortInsert before the ActionLinker
|
24
|
+
# a = seq.new_action(0,p_q_beg-1,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
25
|
+
# actions.push a
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# end
|
29
|
+
|
30
|
+
#-------------------------------------------------------------------------
|
31
|
+
#It's created an ActionInsert or ActionShortInsert after the ActionLinker
|
32
|
+
#-------------------------------------------------------------------------
|
33
|
+
# def add_action_after_linker(p_q_end,size_insert,actions,seq)
|
34
|
+
#
|
35
|
+
# size_min_insert = @params.get_param('size_min_insert').to_i
|
36
|
+
#
|
37
|
+
# if ((p_q_end<seq.seq_fasta.size-1) && (size_insert>=size_min_insert) ) #if linker's positions are right
|
38
|
+
# #It's created an ActionInsert after the ActionLinker
|
39
|
+
# a = seq.new_action(p_q_end+1,seq.seq_fasta.size-1,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
40
|
+
#
|
41
|
+
# actions.push a
|
42
|
+
#
|
43
|
+
# elsif (p_q_end<seq.seq_fasta.size-1) #if linker's positions are right and insert's size is short
|
44
|
+
# #It's created an ActionInsert after the ActionLinker
|
45
|
+
# a = seq.new_action(p_q_end+1,seq.seq_fasta.size-1,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
46
|
+
#
|
47
|
+
# actions.push a
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
|
53
|
+
def sum_quals(a)
|
54
|
+
res = 0
|
55
|
+
|
56
|
+
a.map{|e| res+=e}
|
57
|
+
|
58
|
+
return res
|
59
|
+
end
|
60
|
+
|
61
|
+
def merge_hits_with_same_qbeg_and_qend(hits)
|
62
|
+
res =[]
|
63
|
+
|
64
|
+
hits.each do |hit|
|
65
|
+
if !res.find{|h| (h.q_beg==hit.q_beg) && (h.q_end==hit.q_end)}
|
66
|
+
res.push hit
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
return res
|
71
|
+
end
|
72
|
+
|
73
|
+
# Begins the plugin1's execution to warn that there is linker into the sequence
|
74
|
+
def execute(seqs)
|
75
|
+
blasts= do_blasts(seqs)
|
76
|
+
|
77
|
+
seqs.each_with_index do |s,i|
|
78
|
+
exec_seq(s,blasts.querys[i])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def do_blasts(seqs)
|
83
|
+
# find MIDS with less results than max_target_seqs value
|
84
|
+
blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
85
|
+
|
86
|
+
$LOG.info('BLAST:'+blast.get_blast_cmd)
|
87
|
+
|
88
|
+
fastas=[]
|
89
|
+
|
90
|
+
seqs.each do |seq|
|
91
|
+
fastas.push ">"+seq.seq_name
|
92
|
+
fastas.push seq.seq_fasta
|
93
|
+
end
|
94
|
+
|
95
|
+
# fastas=fastas.join("\n")
|
96
|
+
|
97
|
+
blast_table_results = blast.do_blast(fastas)
|
98
|
+
|
99
|
+
# puts blast_table_results.inspect
|
100
|
+
|
101
|
+
return blast_table_results
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
def exec_seq(seq,blast_query)
|
106
|
+
if blast_query.query_id != seq.seq_name
|
107
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
108
|
+
end
|
109
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
|
110
|
+
|
111
|
+
# key_beg,key_end=search_key(seq,0,3) if false
|
112
|
+
# blast = BatchBlast.new("-subject #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
113
|
+
# blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
114
|
+
#
|
115
|
+
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to linkers executing over blast
|
116
|
+
|
117
|
+
|
118
|
+
#blast_table_results = BlastTableResult.new(res)
|
119
|
+
actions=[]
|
120
|
+
linker_size=0
|
121
|
+
|
122
|
+
if (!blast_query.hits.empty?) #linker found
|
123
|
+
|
124
|
+
linkers = merge_hits_with_same_qbeg_and_qend(blast_query.hits)
|
125
|
+
|
126
|
+
if linkers.count ==1
|
127
|
+
|
128
|
+
linker=linkers.first
|
129
|
+
|
130
|
+
linker_size=linker.q_end-linker.q_beg+1
|
131
|
+
|
132
|
+
if (linker.gaps+linker.mismatches>MAX_LINKER_ERRORS) # number of ERRORS and GAPs is higher than MAX_LINKER_ERRORS,
|
133
|
+
seq.seq_rejected=true
|
134
|
+
seq.seq_rejected_by_message='linker with mismatches'
|
135
|
+
# @stats[:rejected_seqs]={'rejected_seqs_by_errors' => 1}
|
136
|
+
add_stats('rejected','by_linker_errors')
|
137
|
+
add_stats('linker_errors',linker.gaps+linker.mismatches)
|
138
|
+
else
|
139
|
+
#Create an ActionLinker
|
140
|
+
a = seq.new_action(linker.q_beg,linker.q_end,'ActionLinker') # adds the ActionLinker to the sequence
|
141
|
+
a.message = linker.subject_id
|
142
|
+
a.tag_id = linker.subject_id
|
143
|
+
actions.push a
|
144
|
+
|
145
|
+
# seq.add_file_tag(0, 'paired', :file)
|
146
|
+
|
147
|
+
add_stats('linker_id',linker.subject_id)
|
148
|
+
add_stats('linker_id','total')
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
else # multiple linkers found
|
153
|
+
q_begs=[]
|
154
|
+
q_ends=[]
|
155
|
+
|
156
|
+
linker_count=linkers.count
|
157
|
+
|
158
|
+
linkers.each do |linker|
|
159
|
+
#puts "*MULTILINKER* #{linker.subject_id[0..40].ljust(40)} #{linker.q_beg.to_s.rjust(6)} #{linker.q_end.to_s.rjust(6)} #{linker.s_beg.to_s.rjust(6)} #{linker.s_end.to_s.rjust(6)}"
|
160
|
+
q_begs.push linker.q_beg
|
161
|
+
q_ends.push linker.q_end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
first_linker = linkers.first
|
166
|
+
last_linker = linkers.last
|
167
|
+
|
168
|
+
a = seq.new_action(q_begs.min,q_ends.max,'ActionMultipleLinker') # adds the ActionLinker to the sequence
|
169
|
+
a.message = "#{linker_count} x #{first_linker.subject_id}"
|
170
|
+
a.tag_id = first_linker.subject_id
|
171
|
+
|
172
|
+
#determine with part (left or right) has the best quality
|
173
|
+
left_quals = seq.seq_qual[0,q_begs.min]
|
174
|
+
sum_left=sum_quals(left_quals)
|
175
|
+
|
176
|
+
right_quals = seq.seq_qual[q_ends.max+1..seq.seq_qual.length]
|
177
|
+
sum_right=sum_quals(right_quals)
|
178
|
+
|
179
|
+
if sum_left>=sum_right
|
180
|
+
a.right_action=true
|
181
|
+
else
|
182
|
+
a.left_action=true
|
183
|
+
end
|
184
|
+
|
185
|
+
#puts "SUM QUAL LEFT:#{sum_left} count:#{left_quals.length}"
|
186
|
+
#puts "SUM QUAL RIGHT:#{sum_right} count:#{right_quals.length}"
|
187
|
+
|
188
|
+
|
189
|
+
actions.push a
|
190
|
+
|
191
|
+
add_stats('multiple_linker_id',first_linker.subject_id)
|
192
|
+
add_stats('multiple_linker_id','total')
|
193
|
+
add_stats('multiple_linker_count',linker_count)
|
194
|
+
|
195
|
+
# puts "=== > seq_qual: #{seq.seq_qual.count}"
|
196
|
+
# seq.get_qual_inserts.each do |qi|
|
197
|
+
# puts "==> #{qi.join(' ')}"
|
198
|
+
# end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
else # no linker found
|
203
|
+
add_stats('without_linker',linker_size)
|
204
|
+
end
|
205
|
+
|
206
|
+
|
207
|
+
if !actions.empty?
|
208
|
+
#Add actions
|
209
|
+
seq.add_actions(actions)
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
#Returns an array with the errors due to parameters are missing
|
216
|
+
def self.check_params(params)
|
217
|
+
errors=[]
|
218
|
+
|
219
|
+
comment='Blast E-value used as cut-off when searching for linkers in paired-ends'
|
220
|
+
default_value = 1e-10
|
221
|
+
params.check_param(errors,'blast_evalue_linkers','Float',default_value,comment)
|
222
|
+
|
223
|
+
comment='Minimum required identity (%) for a reliable linker'
|
224
|
+
default_value = 95
|
225
|
+
params.check_param(errors,'blast_percent_linkers','Integer',default_value,comment)
|
226
|
+
|
227
|
+
|
228
|
+
return errors
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
########################################################
|
2
|
+
# Author: Almudena Bocinos Rioboo
|
3
|
+
#
|
4
|
+
# Defines the main methods that are necessary to execute PluginLowComplexity
|
5
|
+
|
6
|
+
#
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
require "plugin"
|
11
|
+
|
12
|
+
MIN_DUST_SIZE = 30
|
13
|
+
|
14
|
+
class PluginLowComplexity < Plugin
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
def execute(seqs)
|
19
|
+
dust= do_dust(seqs)
|
20
|
+
|
21
|
+
seqs.each_with_index do |s,i|
|
22
|
+
exec_seq(s,dust[i])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def do_dust(seqs)
|
27
|
+
# find MIDS with less results than max_target_seqs value
|
28
|
+
dust_masker=DustMasker.new()
|
29
|
+
|
30
|
+
fastas=[]
|
31
|
+
|
32
|
+
seqs.each do |seq|
|
33
|
+
fastas.push ">"+seq.seq_name
|
34
|
+
fastas.push seq.seq_fasta
|
35
|
+
end
|
36
|
+
|
37
|
+
# fastas=fastas.join("\n")
|
38
|
+
|
39
|
+
found_dust = dust_masker.do_dust(fastas)
|
40
|
+
# puts found_dust
|
41
|
+
# puts blast_table_results.inspect
|
42
|
+
|
43
|
+
return found_dust
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def exec_seq(seq,dust_query)
|
48
|
+
if dust_query.query_id != seq.seq_name
|
49
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
50
|
+
end
|
51
|
+
actions=[]
|
52
|
+
|
53
|
+
# puts "Checking for dust: #{seq.seq_fasta}"
|
54
|
+
# puts found_dust.to_json
|
55
|
+
total_dust=0
|
56
|
+
if !dust_query.nil?
|
57
|
+
dust_query.dust.each do |dust|
|
58
|
+
start=dust[0]
|
59
|
+
stop=dust[1]
|
60
|
+
dust_size=dust[1]-dust[0]+1
|
61
|
+
total_dust+=dust_size
|
62
|
+
|
63
|
+
if (dust_size)>=MIN_DUST_SIZE
|
64
|
+
|
65
|
+
a = seq.new_action(start,stop,'ActionLowComplexity')
|
66
|
+
# a.left_action=true
|
67
|
+
actions.push a
|
68
|
+
# break
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
if !actions.empty?
|
74
|
+
add_stats('low_complexity',total_dust)
|
75
|
+
seq.add_actions(actions)
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
######################################################################
|
83
|
+
#---------------------------------------------------------------------
|
84
|
+
|
85
|
+
#Returns an array with the errors due to parameters are missing
|
86
|
+
def self.check_params(params)
|
87
|
+
errors=[]
|
88
|
+
|
89
|
+
#
|
90
|
+
# comment='Minimum percent of T bases in poly_a to be accepted'
|
91
|
+
# default_value = 80
|
92
|
+
# params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
|
93
|
+
#
|
94
|
+
return errors
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
########################################################
|
2
|
+
# Author: Almudena Bocinos Rioboo
|
3
|
+
#
|
4
|
+
# Defines the main methods that are necessary to execute PluginLowHighSize
|
5
|
+
|
6
|
+
#
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
require "plugin"
|
10
|
+
|
11
|
+
|
12
|
+
class PluginLowHighSize < Plugin
|
13
|
+
|
14
|
+
|
15
|
+
# Begins the plugin_low_high_size's execution with the sequence "seq"
|
16
|
+
|
17
|
+
def execute(seqs)
|
18
|
+
seqs.each do |s|
|
19
|
+
exec_seq(s)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def exec_seq(seq)
|
25
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
|
26
|
+
|
27
|
+
min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
|
28
|
+
max_size = @params.get_param('max_sequence_size_raw').to_i #max_size is: mean + 2dev
|
29
|
+
#add_stats('rejected_seqs',seq.seq_fasta.length)
|
30
|
+
actions=[]
|
31
|
+
|
32
|
+
if ((seq.seq_fasta.length>max_size) || (seq.seq_fasta.length<min_size)) #if length of sequence is out of (-2dev,2dev)
|
33
|
+
$LOG.debug "#{seq.seq_name} rejected by size #{seq.seq_fasta.length} "
|
34
|
+
type='ActionLowHighSize'
|
35
|
+
# seq.add_action(0,seq.seq_fasta.length,type)
|
36
|
+
a = seq.new_action(0,seq.seq_fasta.length,type)
|
37
|
+
a.message = 'low or high size'
|
38
|
+
seq.seq_rejected = true
|
39
|
+
seq.seq_rejected_by_message= 'size out of limits'
|
40
|
+
|
41
|
+
add_stats('rejected_seqs',seq.seq_fasta.length)
|
42
|
+
actions.push a
|
43
|
+
seq.add_actions(actions)
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
######################################################################
|
51
|
+
#---------------------------------------------------------------------
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
######################################################################
|
56
|
+
#---------------------------------------------------------------------
|
57
|
+
|
58
|
+
#Returns an array with the errors due to parameters are missing
|
59
|
+
def self.check_params(params)
|
60
|
+
errors=[]
|
61
|
+
|
62
|
+
comment='Minimum size for a raw input sequence to be analysed (shorter reads are directly rejected without further analysis)'
|
63
|
+
default_value = 40
|
64
|
+
params.check_param(errors,'min_sequence_size_raw','Integer',default_value,comment)
|
65
|
+
|
66
|
+
#self.check_param(errors,params,'max_sequence_size_raw','Integer')
|
67
|
+
|
68
|
+
|
69
|
+
return errors
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
end
|