seqtrimnext 2.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
@@ -0,0 +1,267 @@
|
|
1
|
+
########################################################
|
2
|
+
# Author: Almudena Bocinos Rioboo
|
3
|
+
#
|
4
|
+
# Defines the main methods that are necessary to execute a plugin
|
5
|
+
#
|
6
|
+
########################################################
|
7
|
+
|
8
|
+
require 'string_utils'
|
9
|
+
# $: << '/Users/dariogf/progs/ruby/gems/scbi_blast/lib'
|
10
|
+
|
11
|
+
require 'scbi_blast'
|
12
|
+
|
13
|
+
class Plugin
|
14
|
+
|
15
|
+
attr_accessor :stats
|
16
|
+
|
17
|
+
#Loads the plugin's execution whit the sequence "seq"
|
18
|
+
def initialize(seq,params)
|
19
|
+
# $LOG.debug self.class.to_s + " processing sequence: " + seq.seq_name
|
20
|
+
# if (!(self.class.to_s=='PluginLowQuality') )
|
21
|
+
@params = params
|
22
|
+
@stats ={}
|
23
|
+
execute(seq)
|
24
|
+
# puts self.class.to_s + ' PPPPPPPPPP'
|
25
|
+
# else
|
26
|
+
# $LOG.error " Quality File haven't been provided. It's impossible to execute " +self.class.to_s + seq.seq_qual.nil?.to_s
|
27
|
+
# end
|
28
|
+
end
|
29
|
+
|
30
|
+
#Begins the plugin's execution whit the sequence "seq"
|
31
|
+
def execute(seqs)
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
#Initializes the structure stats to the given key and value , only when it is neccesary, and increases its counter
|
37
|
+
def add_stats(key,value)
|
38
|
+
|
39
|
+
@stats[key]={} if @stats[key].nil?
|
40
|
+
|
41
|
+
if @stats[key][value].nil?
|
42
|
+
@stats[key][value] = 0
|
43
|
+
end
|
44
|
+
@stats[key][value] += 1
|
45
|
+
|
46
|
+
# puts "@stats #{key} #{value}=#{ @stats[key][value]}"
|
47
|
+
end
|
48
|
+
|
49
|
+
#Initializes the structure stats to the given key and value , only when it is neccesary, and increases its counter
|
50
|
+
def add_text_stats(key,value,text)
|
51
|
+
|
52
|
+
@stats[key]={} if @stats[key].nil?
|
53
|
+
|
54
|
+
if @stats[key][value].nil?
|
55
|
+
@stats[key][value] = []
|
56
|
+
end
|
57
|
+
|
58
|
+
@stats[key][value].push(text)
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def overlapX?(r1_start,r1_end,r2_start,r2_end)
|
63
|
+
# puts r1_start.class
|
64
|
+
# puts r1_end.class
|
65
|
+
# puts r2_start.class
|
66
|
+
# puts r2_end.class
|
67
|
+
# puts "-------"
|
68
|
+
#puts "overlap? (#{r1_start}<=#{r2_end}) and (#{r1_end}>=#{r2_start})"
|
69
|
+
return ((r1_start<=r2_end) and (r1_end>=r2_start) )
|
70
|
+
end
|
71
|
+
|
72
|
+
def merge_hits(hits,merged_hits,merged_ids=nil)
|
73
|
+
# puts " merging ============"
|
74
|
+
hits.each do |hit|
|
75
|
+
|
76
|
+
merged_ids.push hit.definition if !merged_ids.nil? && (! merged_ids.include?(hit.definition))
|
77
|
+
# if new hit's position is already contained in hits, then ignore the new hit
|
78
|
+
c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
|
79
|
+
# puts " c #{c.inspect}"
|
80
|
+
|
81
|
+
|
82
|
+
if (c.nil?)
|
83
|
+
# add new contaminant
|
84
|
+
#puts "NEW HIT #{hit.inspect}"
|
85
|
+
merged_hits.push(hit.dup)
|
86
|
+
#contaminants.push({:q_begin=>hit.q_beg,:q_end=>hit.q_end,:name=>hit.subject_id})
|
87
|
+
#
|
88
|
+
else
|
89
|
+
# merge with old contaminant
|
90
|
+
min=[c.q_beg,hit.q_beg].min
|
91
|
+
max=[c.q_end,hit.q_end].max
|
92
|
+
|
93
|
+
c.q_beg=min
|
94
|
+
c.q_end=max
|
95
|
+
|
96
|
+
|
97
|
+
# DONE para describir cada Id contaminante encontrado
|
98
|
+
# puts "1 -#{c.subject_id}- -#{hit.subject_id}-"
|
99
|
+
c.subject_id += ' ' + hit.subject_id if (not c.subject_id.include?(hit.subject_id))
|
100
|
+
# puts "2 -#{c.subject_id}- -#{hit.subject_id}-"
|
101
|
+
# puts "MERGE HIT (#{c.inspect})"
|
102
|
+
|
103
|
+
#
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# def check_length_inserted(p_start,p_end,seq_fasta_length)
|
111
|
+
# min_insert_size = @params.get_param('min_insert_size ').to_i
|
112
|
+
# v1= p_end.to_i
|
113
|
+
# v2= p_start.to_i
|
114
|
+
# v3= v1 - v2
|
115
|
+
# $LOG.debug "------ #{v3} ----"
|
116
|
+
#
|
117
|
+
# res = true
|
118
|
+
# if ((v1 - v2 + 1) > (seq_fasta_length - min_insert_size ))
|
119
|
+
# $LOG.debug "ERROR------ SEQUENCE IS NOT GOOD ----"
|
120
|
+
# res = false
|
121
|
+
# end
|
122
|
+
# return res
|
123
|
+
# end
|
124
|
+
#------------------------------------------
|
125
|
+
# search a key into the sequence
|
126
|
+
# Used: in class PluginLinker and PluginMid
|
127
|
+
#-------------------------------------------
|
128
|
+
# def search_key (seq,key_start,key_end)
|
129
|
+
# p_q_beg=0
|
130
|
+
# p_q_end=0
|
131
|
+
# if (seq.seq_fasta[key_start..key_end]==@params.get_param('key'))
|
132
|
+
# actions=[]
|
133
|
+
# #Add ActionKey and apply it to cut the sequence
|
134
|
+
#
|
135
|
+
# type = "ActionKey"
|
136
|
+
#
|
137
|
+
# p_q_beg,p_q_end=key_start,key_end
|
138
|
+
# a = seq.new_action(p_q_beg,p_q_end,type) # adds the actionKey/actionMid to the sequence
|
139
|
+
#
|
140
|
+
# actions.push a
|
141
|
+
#
|
142
|
+
# seq.add_actions(actions) #apply cut to the sequence with the actions
|
143
|
+
# end
|
144
|
+
# return [p_q_beg,p_q_end]
|
145
|
+
#
|
146
|
+
# end
|
147
|
+
|
148
|
+
def self.check_param(errors,params,param,param_class,default_value=nil, comment=nil)
|
149
|
+
|
150
|
+
if !params.exists?(param)
|
151
|
+
if !default_value.nil?
|
152
|
+
params.set_param(param,default_value,comment)
|
153
|
+
else
|
154
|
+
errors.push "The param #{param} is required and thre is no default value available"
|
155
|
+
end
|
156
|
+
else
|
157
|
+
s = params.get_param(param)
|
158
|
+
# check_class=Object.const_get(param_class)
|
159
|
+
begin
|
160
|
+
case param_class
|
161
|
+
when 'Integer'
|
162
|
+
r = Integer(s)
|
163
|
+
when 'Float'
|
164
|
+
r = Float(s)
|
165
|
+
when 'String'
|
166
|
+
r = String(s)
|
167
|
+
end
|
168
|
+
|
169
|
+
rescue
|
170
|
+
errors.push " The param #{param} is not a valid #{param_class}: ##{s}#"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
|
178
|
+
#Returns an array with the errors due to parameters are missing
|
179
|
+
def self.check_params(params)
|
180
|
+
return []
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
def self.graph_ignored?(stats_name)
|
185
|
+
res = true
|
186
|
+
|
187
|
+
if !self.ignored_graphs.include?(stats_name) && (self.valid_graphs.empty? || self.valid_graphs.include?(stats_name))
|
188
|
+
res = false
|
189
|
+
end
|
190
|
+
|
191
|
+
return res
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
def self.plot_setup(stats_value,stats_name,x,y,init_stats,plot)
|
196
|
+
return false
|
197
|
+
end
|
198
|
+
|
199
|
+
# automatically setup data
|
200
|
+
def self.auto_setup(stats_value,stats_name,x,y)
|
201
|
+
|
202
|
+
# res =false
|
203
|
+
#
|
204
|
+
# if !self.ignored_graphs.include?(stats_name) && (self.valid_graphs.empty? || self.valid_graphs.include?(stats_name))
|
205
|
+
#
|
206
|
+
# res = true
|
207
|
+
contains_strings=false
|
208
|
+
|
209
|
+
stats_value.keys.each do |v|
|
210
|
+
begin
|
211
|
+
r=Integer(v)
|
212
|
+
rescue
|
213
|
+
contains_strings=true
|
214
|
+
break
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# puts "#{stats_name} => #{contains_strings}"
|
219
|
+
|
220
|
+
|
221
|
+
if !contains_strings
|
222
|
+
stats_value.keys.each do |v|
|
223
|
+
x.push v.to_i
|
224
|
+
end
|
225
|
+
|
226
|
+
x.sort!
|
227
|
+
|
228
|
+
x.each do |v|
|
229
|
+
y.push stats_value[v.to_s].to_i
|
230
|
+
end
|
231
|
+
|
232
|
+
else # there are strings in X
|
233
|
+
x2=[]
|
234
|
+
|
235
|
+
stats_value.keys.each do |v|
|
236
|
+
x.push "\"#{v.gsub('\"','').gsub('\'','')}\""
|
237
|
+
x2.push v
|
238
|
+
end
|
239
|
+
|
240
|
+
# puts ".#{x}."
|
241
|
+
x2.each do |v|
|
242
|
+
# puts ".#{v}."
|
243
|
+
y.push stats_value[v.to_s]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# return res
|
248
|
+
end
|
249
|
+
|
250
|
+
def self.get_graph_title(plugin_name,stats_name)
|
251
|
+
return plugin_name + '/' +stats_name
|
252
|
+
end
|
253
|
+
|
254
|
+
def self.get_graph_filename(plugin_name,stats_name)
|
255
|
+
return (plugin_name+ '_' +stats_name)
|
256
|
+
end
|
257
|
+
|
258
|
+
def self.ignored_graphs
|
259
|
+
return []
|
260
|
+
end
|
261
|
+
|
262
|
+
def self.valid_graphs
|
263
|
+
return []
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginAdapters
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class PluginAbAdapters < Plugin
|
11
|
+
|
12
|
+
# adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
|
13
|
+
MIN_ADAPTER_SIZE = 5
|
14
|
+
MIN_FAR_ADAPTER_SIZE = 13
|
15
|
+
MIN_LEFT_ADAPTER_SIZE = 9
|
16
|
+
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
17
|
+
def execute(seqs)
|
18
|
+
blasts= do_blasts(seqs)
|
19
|
+
|
20
|
+
seqs.each_with_index do |s,i|
|
21
|
+
exec_seq(s,blasts.querys[i])
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def do_blasts(seqs)
|
26
|
+
# find MIDS with less results than max_target_seqs value
|
27
|
+
blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
|
28
|
+
$LOG.info('BLAST:'+blast.get_blast_cmd)
|
29
|
+
|
30
|
+
fastas=[]
|
31
|
+
|
32
|
+
seqs.each do |seq|
|
33
|
+
fastas.push ">"+seq.seq_name
|
34
|
+
fastas.push seq.seq_fasta
|
35
|
+
end
|
36
|
+
|
37
|
+
# fastas=fastas.join("\n")
|
38
|
+
|
39
|
+
blast_table_results = blast.do_blast(fastas)
|
40
|
+
|
41
|
+
# puts blast_table_results.inspect
|
42
|
+
|
43
|
+
return blast_table_results
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def exec_seq(seq,blast_query)
|
48
|
+
if blast_query.query_id != seq.seq_name
|
49
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
50
|
+
end
|
51
|
+
|
52
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
|
53
|
+
|
54
|
+
|
55
|
+
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_ab')} -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
# blast with only one sequence, no with many sequences from a database
|
60
|
+
#---------------------------------------------------------------------
|
61
|
+
|
62
|
+
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
|
63
|
+
|
64
|
+
#BlastTableResult.new(res)
|
65
|
+
# puts blast.get_blast_cmd
|
66
|
+
# puts blast_table_results.inspect
|
67
|
+
|
68
|
+
adapters=[]
|
69
|
+
# blast_table_results.querys.each do |query| # first round to save adapters without overlap
|
70
|
+
merge_hits(blast_query.hits,adapters)
|
71
|
+
# end
|
72
|
+
|
73
|
+
begin
|
74
|
+
adapters2=adapters # second round to save adapters without overlap
|
75
|
+
adapters = []
|
76
|
+
merge_hits(adapters2,adapters)
|
77
|
+
end until (adapters2.count == adapters.count)
|
78
|
+
|
79
|
+
max_to_end=@params.get_param('max_ab_to_end').to_i
|
80
|
+
# type = 'ActionAbAdapter'
|
81
|
+
actions=[]
|
82
|
+
adapter_size=0
|
83
|
+
|
84
|
+
#@stats['adapter_size']={}
|
85
|
+
adapters.each do |c| # adds the correspondent action to the sequence
|
86
|
+
# puts "is the adapter near to the end of sequence ? #{c.q_end+seq.insert_start+max_to_end} >= ? #{seq.seq_fasta_orig.size-1}"
|
87
|
+
adapter_size=c.q_end-c.q_beg+1
|
88
|
+
#if ((c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1)
|
89
|
+
right_action = true
|
90
|
+
#if ab adapter is very near to the end of original sequence
|
91
|
+
if c.q_end>=seq.seq_fasta.length-max_to_end
|
92
|
+
message = c.subject_id
|
93
|
+
type = 'ActionAbAdapter'
|
94
|
+
ignore=false
|
95
|
+
add_stats('adapter_type','normal')
|
96
|
+
|
97
|
+
elsif (c.q_beg <= 4) && (adapter_size>=MIN_LEFT_ADAPTER_SIZE) #left adapter
|
98
|
+
message = c.subject_id
|
99
|
+
type = 'ActionAbLeftAdapter'
|
100
|
+
ignore = false
|
101
|
+
right_action = false
|
102
|
+
add_stats('adapter_type','left')
|
103
|
+
elsif (adapter_size>=MIN_FAR_ADAPTER_SIZE)
|
104
|
+
message = c.subject_id
|
105
|
+
type = 'ActionAbFarAdapter'
|
106
|
+
ignore = false
|
107
|
+
add_stats('adapter_type','far')
|
108
|
+
else
|
109
|
+
ignore=true
|
110
|
+
end
|
111
|
+
|
112
|
+
if !ignore
|
113
|
+
a = seq.new_action(c.q_beg,c.q_end,type)
|
114
|
+
a.message = message
|
115
|
+
a.reversed = c.reversed
|
116
|
+
if right_action
|
117
|
+
a.right_action = true #mark as rigth action to get the left insert
|
118
|
+
else
|
119
|
+
a.left_action = true
|
120
|
+
end
|
121
|
+
actions.push a
|
122
|
+
|
123
|
+
# puts "adapter_size #{adapter_size}"
|
124
|
+
|
125
|
+
#@stats[:adapter_size]={adapter_size => 1}
|
126
|
+
add_stats('adapter_size',adapter_size)
|
127
|
+
add_stats('adapter_id',message)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
if !actions.empty?
|
132
|
+
seq.add_actions(actions)
|
133
|
+
add_stats('sequences_with_adapter','count')
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
#
|
138
|
+
end
|
139
|
+
|
140
|
+
#Returns an array with the errors due to parameters are missing
|
141
|
+
def self.check_params(params)
|
142
|
+
errors=[]
|
143
|
+
|
144
|
+
comment='Blast E-value used as cut-off when searching for 454 AB adapters'
|
145
|
+
# default_value = 1e-6
|
146
|
+
default_value = 1
|
147
|
+
params.check_param(errors,'blast_evalue_ab','Float',default_value,comment)
|
148
|
+
|
149
|
+
comment='Minimum required identity (%) for a reliable 454 AB adapter'
|
150
|
+
default_value = 95
|
151
|
+
params.check_param(errors,'blast_percent_ab','Integer',default_value,comment)
|
152
|
+
|
153
|
+
comment='454 AB adapters can be found only at the read end (not within it). The following variable indicates the number of nucleotides that are allowed for considering the AB adapters to be located at the end'
|
154
|
+
default_value = 9
|
155
|
+
params.check_param(errors,'max_ab_to_end','Integer',default_value,comment)
|
156
|
+
|
157
|
+
comment='Path for 454 AB adapters database'
|
158
|
+
default_value = File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')
|
159
|
+
params.check_param(errors,'adapters_ab_db','DB',default_value,comment)
|
160
|
+
|
161
|
+
return errors
|
162
|
+
end
|
163
|
+
|
164
|
+
def self.get_graph_title(plugin_name,stats_name)
|
165
|
+
case stats_name
|
166
|
+
when 'adapter_type'
|
167
|
+
'AB adapters by type'
|
168
|
+
when 'adapter_size'
|
169
|
+
'AB adapters by size'
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def self.get_graph_filename(plugin_name,stats_name)
|
174
|
+
return stats_name
|
175
|
+
|
176
|
+
# case stats_name
|
177
|
+
# when 'adapter_type'
|
178
|
+
# 'AB adapters by type'
|
179
|
+
# when 'adapter_size'
|
180
|
+
# 'AB adapters by size'
|
181
|
+
# end
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.valid_graphs
|
185
|
+
return ['adapter_type']
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginAdapters
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class PluginAdapters < Plugin
|
11
|
+
|
12
|
+
def get_type_adapter(p_start,p_end,seq)
|
13
|
+
#if q_beg is nearer the left, add adapter action by the left,
|
14
|
+
#if q_end esta is nearer the right , add adapter action by the right
|
15
|
+
#NOTE: If the adapter is very near from left and rigth,
|
16
|
+
#then the sequence isn't valid, because almost sequence is adapter.
|
17
|
+
|
18
|
+
|
19
|
+
v1= p_end.to_i
|
20
|
+
v2= p_start.to_i
|
21
|
+
|
22
|
+
# puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
|
23
|
+
|
24
|
+
# puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
|
25
|
+
if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
|
26
|
+
type = "ActionLeftAdapter"
|
27
|
+
|
28
|
+
else
|
29
|
+
type = "ActionRightAdapter"
|
30
|
+
|
31
|
+
end
|
32
|
+
return type
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def cut_by_right(adapter,seq)
|
37
|
+
|
38
|
+
left_size = adapter.q_beg-seq.insert_start+1
|
39
|
+
right_size = seq.insert_end-adapter.q_end+1
|
40
|
+
left_size=0 if (left_size<0)
|
41
|
+
right_size=0 if (right_size<0)
|
42
|
+
|
43
|
+
return (left_size>(right_size/2).to_i)
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
48
|
+
def execute(seqs)
|
49
|
+
blasts= do_blasts(seqs)
|
50
|
+
|
51
|
+
seqs.each_with_index do |s,i|
|
52
|
+
exec_seq(s,blasts.querys[i])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def do_blasts(seqs)
|
57
|
+
# find MIDS with less results than max_target_seqs value
|
58
|
+
blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
59
|
+
$LOG.info('BLAST:'+blast.get_blast_cmd)
|
60
|
+
|
61
|
+
fastas=[]
|
62
|
+
|
63
|
+
seqs.each do |seq|
|
64
|
+
fastas.push ">"+seq.seq_name
|
65
|
+
fastas.push seq.seq_fasta
|
66
|
+
end
|
67
|
+
|
68
|
+
# fastas=fastas.join("\n")
|
69
|
+
|
70
|
+
blast_table_results = blast.do_blast(fastas)
|
71
|
+
|
72
|
+
# puts blast_table_results.inspect
|
73
|
+
|
74
|
+
return blast_table_results
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
def exec_seq(seq,blast_query)
|
79
|
+
if blast_query.query_id != seq.seq_name
|
80
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
81
|
+
end
|
82
|
+
|
83
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
|
84
|
+
|
85
|
+
|
86
|
+
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
87
|
+
|
88
|
+
# blast with only one sequence, no with many sequences from a database
|
89
|
+
#---------------------------------------------------------------------
|
90
|
+
|
91
|
+
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
|
92
|
+
|
93
|
+
#blast_table_results = BlastTableResult.new(res)
|
94
|
+
|
95
|
+
# blast_table_results.inspect
|
96
|
+
|
97
|
+
adapters=[]
|
98
|
+
# blast_table_results.querys.each do |query| # first round to save adapters without overlap
|
99
|
+
merge_hits(blast_query,adapters)
|
100
|
+
# end
|
101
|
+
|
102
|
+
begin
|
103
|
+
adapters2=adapters # second round to save adapters without overlap
|
104
|
+
adapters = []
|
105
|
+
merge_hits(adapters2,adapters)
|
106
|
+
end until (adapters2.count == adapters.count)
|
107
|
+
|
108
|
+
actions=[]
|
109
|
+
adapter_size=0
|
110
|
+
@stats['adapter_size']={}
|
111
|
+
adapters.each do |ad| # adds the correspondent action to the sequence
|
112
|
+
|
113
|
+
type = get_type_adapter(ad.q_beg,ad.q_end,seq)
|
114
|
+
a = seq.new_action(ad.q_beg,ad.q_end,type)
|
115
|
+
# puts " state left_action #{a.left_action} right_action #{a.right_action}"
|
116
|
+
|
117
|
+
|
118
|
+
adapter_size=ad.q_end-ad.q_beg+1
|
119
|
+
|
120
|
+
if cut_by_right(ad,seq)
|
121
|
+
|
122
|
+
# puts "action right end1 #{seq.insert_end}"
|
123
|
+
|
124
|
+
a.right_action=true #mark rigth action to get the left insert
|
125
|
+
else
|
126
|
+
|
127
|
+
# puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
|
128
|
+
|
129
|
+
a.left_action = true #mark left action to get the right insert
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
a.message = ad.subject_id
|
134
|
+
a.reversed = ad.reversed
|
135
|
+
actions.push a
|
136
|
+
|
137
|
+
# @stats[:adapter_size]={adapter_size => 1}
|
138
|
+
add_stats('adapter_size',adapter_size)
|
139
|
+
|
140
|
+
end
|
141
|
+
seq.add_actions(actions)
|
142
|
+
#
|
143
|
+
end
|
144
|
+
|
145
|
+
#Returns an array with the errors due to parameters are missing
|
146
|
+
def self.check_params(params)
|
147
|
+
errors=[]
|
148
|
+
|
149
|
+
comment='Blast E-value used as cut-off when searching for adapters or primers'
|
150
|
+
default_value = 1e-6
|
151
|
+
params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
|
152
|
+
|
153
|
+
comment='Minimum required identity (%) for a reliable adapter'
|
154
|
+
default_value = 95
|
155
|
+
params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
|
156
|
+
|
157
|
+
comment='Path for adapter database'
|
158
|
+
default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
|
159
|
+
params.check_param(errors,'adapters_db','DB',default_value,comment)
|
160
|
+
|
161
|
+
return errors
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
end
|