seqtrimnext 2.0.39 → 2.0.41
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +10 -0
- data/Manifest.txt +1 -0
- data/README.rdoc +12 -1
- data/lib/seqtrimnext/actions/action_middle_adapter.rb +29 -0
- data/lib/seqtrimnext/classes/params.rb +102 -48
- data/lib/seqtrimnext/classes/seqtrim.rb +7 -6
- data/lib/seqtrimnext/plugins/plugin.rb +18 -11
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +128 -79
- data/lib/seqtrimnext/plugins/plugin_linker.rb +5 -1
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +1 -1
- data/lib/seqtrimnext.rb +1 -1
- metadata +3 -2
data/History.txt
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
=== 2.0.41 2011-11-04
|
2
|
+
|
3
|
+
Added extra adapters plugin for transcriptomics
|
4
|
+
|
5
|
+
=== 2.0.40 2011-10-27
|
6
|
+
|
7
|
+
-Support Relative paths in user DB
|
8
|
+
-Can mix relative and absolute paths in user DB
|
9
|
+
-Custom linkers database
|
10
|
+
|
1
11
|
=== 2.0.39 2011-07-07
|
2
12
|
|
3
13
|
Fixed database list option
|
data/Manifest.txt
CHANGED
@@ -41,6 +41,7 @@ lib/seqtrimnext/actions/action_poly_a.rb
|
|
41
41
|
lib/seqtrimnext/actions/action_poly_t.rb
|
42
42
|
lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
|
43
43
|
lib/seqtrimnext/actions/action_right_adapter.rb
|
44
|
+
lib/seqtrimnext/actions/action_middle_adapter.rb
|
44
45
|
lib/seqtrimnext/actions/action_right_primer.rb
|
45
46
|
lib/seqtrimnext/actions/action_short_insert.rb
|
46
47
|
lib/seqtrimnext/actions/action_unexpected_poly_t.rb
|
data/README.rdoc
CHANGED
@@ -118,15 +118,26 @@ The same way you can modify any of the parameters. You can find all parameters a
|
|
118
118
|
== REQUIREMENTS:
|
119
119
|
|
120
120
|
* Ruby 1.9.2
|
121
|
+
* CD-HIT 4.5.3 or greater
|
121
122
|
* Blast plus 2.24 or greater (prior versions have bugs that produces bad results)
|
122
123
|
* [Optional] - GnuPlot version 4.4.2 or greater (prior versions may produce wrong graphs)
|
123
124
|
* [Optional] - pdflatex - Optional, to produce a detailed report with results
|
124
125
|
|
126
|
+
|
125
127
|
== INSTALL:
|
126
128
|
|
129
|
+
=== Installing CD-HIT
|
130
|
+
|
131
|
+
*Download the latest version from http://code.google.com/p/cdhit/downloads/list
|
132
|
+
*You can also use a precompiled version if you like
|
133
|
+
*To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
|
134
|
+
|
135
|
+
make
|
136
|
+
sudo make install
|
137
|
+
|
127
138
|
=== Installing Blast
|
128
139
|
|
129
|
-
*Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/
|
140
|
+
*Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
|
130
141
|
*You can also use a precompiled version if you like
|
131
142
|
*To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
|
132
143
|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionMiddleAdapter < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =true
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
# def apply_to(seq)
|
19
|
+
#
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
21
|
+
# $LOG.debug " Applying #{self.class}. BEGIN: #{@start_pos} END: #{@end_pos} "
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
|
25
|
+
def apply_decoration(char)
|
26
|
+
return char.magenta
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -13,6 +13,7 @@ class Params
|
|
13
13
|
# @param_order={}
|
14
14
|
@mids = {}
|
15
15
|
@ab_adapters={}
|
16
|
+
@adapters={}
|
16
17
|
@linkers = {}
|
17
18
|
@clusters = {}
|
18
19
|
|
@@ -55,17 +56,36 @@ class Params
|
|
55
56
|
end
|
56
57
|
end# end def
|
57
58
|
|
58
|
-
def load_db_fastas(
|
59
|
+
def load_db_fastas(input_paths)
|
60
|
+
|
59
61
|
res={}
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
62
|
+
|
63
|
+
if (!input_paths.nil?) & (input_paths!='')
|
64
|
+
# remove quotes
|
65
|
+
paths=input_paths.gsub(/\A['"]+|['"]+\Z/, "")
|
66
|
+
|
67
|
+
# split paths by spaces
|
68
|
+
# puts "PATHS:"
|
69
|
+
# puts paths.split(' ')
|
70
|
+
paths.split(' ').each do |path_file|
|
71
|
+
|
72
|
+
if File.exists?(path_file)
|
73
|
+
ff = FastaFile.new(path_file)
|
74
|
+
ff.each {|n,f|
|
75
|
+
res[n]=f
|
76
|
+
}
|
77
|
+
|
78
|
+
ff.close
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
68
82
|
end
|
83
|
+
|
84
|
+
# puts "LOADED_DB #{paths}:"
|
85
|
+
# res.each do |k,v|
|
86
|
+
# puts k
|
87
|
+
# end
|
88
|
+
|
69
89
|
return res
|
70
90
|
end
|
71
91
|
|
@@ -78,13 +98,19 @@ class Params
|
|
78
98
|
# Load ab_adapters file
|
79
99
|
def load_ab_adapters(path_file)
|
80
100
|
@ab_adapters=load_db_fastas(path_file)
|
81
|
-
|
101
|
+
# puts @ab_adapters
|
102
|
+
end
|
103
|
+
|
104
|
+
# load normal adapters
|
105
|
+
def load_adapters(path_file)
|
106
|
+
@adapters=load_db_fastas(path_file)
|
82
107
|
end
|
83
108
|
|
109
|
+
|
84
110
|
# Load mid's file
|
85
111
|
def load_linkers(path_file)
|
86
112
|
@linkers=load_db_fastas(path_file)
|
87
|
-
|
113
|
+
# puts @linkers
|
88
114
|
end
|
89
115
|
|
90
116
|
def load_repeated_seqs(file_path)
|
@@ -156,19 +182,38 @@ class Params
|
|
156
182
|
return @params[param]
|
157
183
|
end
|
158
184
|
|
185
|
+
def get_fasta(list,name,type)
|
186
|
+
res = list[name]
|
187
|
+
|
188
|
+
if res.nil?
|
189
|
+
$LOG.error("Error. The #{type}: #{name} was not correctly loaded")
|
190
|
+
raise "Error. The #{type}: #{name} was not found in loaded #{name}s: #{list.map{|k,v| k}}."
|
191
|
+
end
|
192
|
+
|
193
|
+
return res
|
194
|
+
end
|
195
|
+
|
159
196
|
# Return the mid's size of param
|
160
|
-
def get_mid(
|
161
|
-
return @mids[
|
197
|
+
def get_mid(mid)
|
198
|
+
# return @mids[mid]
|
199
|
+
return get_fasta(@mids,mid,"mid")
|
162
200
|
end
|
163
201
|
|
164
202
|
# Return the linker of param
|
165
203
|
def get_linker(linker)
|
166
|
-
return @linkers[linker]
|
204
|
+
# return @linkers[linker]
|
205
|
+
return get_fasta(@linkers,linker,"linker")
|
167
206
|
end
|
168
207
|
|
169
208
|
# Return the ab of param
|
170
209
|
def get_ab_adapter(adapter)
|
171
|
-
return @ab_adapters[adapter]
|
210
|
+
# return @ab_adapters[adapter]
|
211
|
+
return get_fasta(@ab_adapters,adapter,"ab_adapter")
|
212
|
+
end
|
213
|
+
|
214
|
+
def get_adapter(adapter)
|
215
|
+
# return @adapters[adapter]
|
216
|
+
return get_fasta(@adapters,adapter,"adapter")
|
172
217
|
end
|
173
218
|
|
174
219
|
|
@@ -300,16 +345,21 @@ class Params
|
|
300
345
|
# expand database paths
|
301
346
|
dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
|
302
347
|
# puts "ALGO"*20
|
303
|
-
puts dbs.join(',')
|
348
|
+
# puts "INPUT DATABASES:\n"+dbs.join(',')
|
349
|
+
|
350
|
+
procesed_dbs=[]
|
304
351
|
#
|
305
352
|
# TODO - chequear aqui que la db no esta vacia y que esta formateada.
|
306
|
-
dbs.reverse_each {|
|
353
|
+
dbs.reverse_each {|db_p|
|
354
|
+
db=File.expand_path(db_p)
|
355
|
+
|
307
356
|
if !File.exists?(db)
|
308
|
-
path=File.join($FORMATTED_DB_PATH,
|
357
|
+
path=File.join($FORMATTED_DB_PATH,db_p)
|
309
358
|
else
|
310
359
|
path=db
|
311
360
|
end
|
312
361
|
|
362
|
+
|
313
363
|
if Dir.glob(path+'*.n*').entries.empty?
|
314
364
|
puts "DB file #{path} not formatted"
|
315
365
|
|
@@ -321,15 +371,19 @@ class Params
|
|
321
371
|
end
|
322
372
|
end
|
323
373
|
|
374
|
+
procesed_dbs << path
|
375
|
+
|
324
376
|
if !File.exists?(path)
|
325
377
|
raise "DB File #{path} does not exists"
|
326
378
|
# exit
|
327
379
|
end
|
328
380
|
}
|
329
|
-
|
381
|
+
|
382
|
+
db_paths = '"'+procesed_dbs.join(' ')+'"'
|
330
383
|
|
331
384
|
set_param(db_param_name,db_paths)
|
332
|
-
|
385
|
+
|
386
|
+
puts "USED DATABASES\n"+db_paths
|
333
387
|
end
|
334
388
|
|
335
389
|
|
@@ -351,7 +405,7 @@ class Params
|
|
351
405
|
end
|
352
406
|
|
353
407
|
def check_param(errors,param,param_class,default_value=nil, comment=nil)
|
354
|
-
|
408
|
+
|
355
409
|
if !exists?(param)
|
356
410
|
if default_value.nil? #|| (default_value.is_a?(String) && default_value.empty?)
|
357
411
|
errors.push "The param #{param} is required and no default value is available"
|
@@ -360,41 +414,41 @@ class Params
|
|
360
414
|
end
|
361
415
|
end
|
362
416
|
|
363
|
-
|
417
|
+
s = get_param(param)
|
364
418
|
|
365
419
|
|
366
|
-
|
420
|
+
set_comment(get_plugin,param,comment)
|
367
421
|
|
368
|
-
|
369
|
-
|
422
|
+
# check_class=Object.const_get(param_class)
|
423
|
+
begin
|
370
424
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
r = check_db_param(errors,param)
|
425
|
+
case param_class
|
426
|
+
when 'Integer'
|
427
|
+
r = Integer(s)
|
428
|
+
when 'Float'
|
429
|
+
r = Float(s)
|
430
|
+
when 'String'
|
431
|
+
r = String(s)
|
432
|
+
when 'DB'
|
433
|
+
# it is a string
|
434
|
+
r = String(s)
|
435
|
+
# and must be a valid db
|
384
436
|
|
385
|
-
|
386
|
-
r=String(s)
|
387
|
-
r= check_plugin_list_param(errors,param)
|
388
|
-
end
|
437
|
+
r = check_db_param(errors,param)
|
389
438
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
end
|
439
|
+
when 'PluginList'
|
440
|
+
r=String(s)
|
441
|
+
r= check_plugin_list_param(errors,param)
|
442
|
+
end
|
395
443
|
|
396
|
-
|
444
|
+
rescue Exception => e
|
445
|
+
message="Current value is ##{s}#. "
|
446
|
+
if param_class=='DB'
|
447
|
+
message += e.message
|
397
448
|
end
|
449
|
+
|
450
|
+
errors.push "Param #{param} is not a valid #{param_class}. #{message}"
|
451
|
+
end
|
398
452
|
# end
|
399
453
|
|
400
454
|
end
|
@@ -230,14 +230,15 @@ class Seqtrim
|
|
230
230
|
#MakeBlastDb.format_db(es.truncated_file_path,File.basename(es.truncated_file_path,File.extname(es.truncated_file_path)),'./') if piro_on
|
231
231
|
|
232
232
|
# leer mids
|
233
|
-
params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
|
234
|
-
params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
|
235
|
-
params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
|
233
|
+
# params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
|
234
|
+
# params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
|
235
|
+
# params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
|
236
236
|
|
237
|
+
params.load_mids(params.get_param('mids_db'))
|
238
|
+
params.load_ab_adapters(params.get_param('adapters_ab_db'))
|
239
|
+
params.load_adapters(params.get_param('adapters_db'))
|
240
|
+
params.load_linkers(params.get_param('linkers_db'))
|
237
241
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
242
|
#execute cd-hit
|
242
243
|
if params.get_param('remove_clonality')=='true'
|
243
244
|
|
@@ -71,7 +71,8 @@ class Plugin
|
|
71
71
|
|
72
72
|
def merge_hits(hits,merged_hits,merged_ids=nil)
|
73
73
|
# puts " merging ============"
|
74
|
-
hits.each do |hit|
|
74
|
+
# hits.each do |hit|
|
75
|
+
hits.sort{|h1,h2| (h1.q_end-h1.q_beg+1)<=>(h2.q_end-h2.q_beg+1)}.reverse_each do |hit|
|
75
76
|
|
76
77
|
merged_ids.push hit.definition if !merged_ids.nil? && (! merged_ids.include?(hit.definition))
|
77
78
|
# if new hit's position is already contained in hits, then ignore the new hit
|
@@ -86,20 +87,26 @@ class Plugin
|
|
86
87
|
#contaminants.push({:q_begin=>hit.q_beg,:q_end=>hit.q_end,:name=>hit.subject_id})
|
87
88
|
#
|
88
89
|
else
|
89
|
-
# merge with old contaminant
|
90
|
-
min=[c.q_beg,hit.q_beg].min
|
91
|
-
max=[c.q_end,hit.q_end].max
|
92
90
|
|
93
|
-
|
94
|
-
c.q_end
|
91
|
+
# one is inside each other, just ignore
|
92
|
+
if ((hit.q_beg>=c.q_beg && hit.q_end <=c.q_end) || (c.q_beg>=hit.q_beg && c.q_end <= hit.q_end))
|
93
|
+
# puts "* #{hit.subject_id} inside #{c.subject_id}"
|
94
|
+
else
|
95
|
+
# merge with old contaminant
|
96
|
+
# puts "#{hit.subject_id} NOT inside #{c.subject_id}"
|
97
|
+
min=[c.q_beg,hit.q_beg].min
|
98
|
+
max=[c.q_end,hit.q_end].max
|
95
99
|
|
100
|
+
c.q_beg=min
|
101
|
+
c.q_end=max
|
96
102
|
|
97
|
-
# DONE para describir cada Id contaminante encontrado
|
98
|
-
# puts "1 -#{c.subject_id}- -#{hit.subject_id}-"
|
99
|
-
c.subject_id += ' ' + hit.subject_id if (not c.subject_id.include?(hit.subject_id))
|
100
|
-
# puts "2 -#{c.subject_id}- -#{hit.subject_id}-"
|
101
|
-
# puts "MERGE HIT (#{c.inspect})"
|
102
103
|
|
104
|
+
# DONE para describir cada Id contaminante encontrado
|
105
|
+
# puts "1 -#{c.subject_id}- -#{hit.subject_id}-"
|
106
|
+
c.subject_id += ' ' + hit.subject_id if (not c.subject_id.include?(hit.subject_id))
|
107
|
+
# puts "2 -#{c.subject_id}- -#{hit.subject_id}-"
|
108
|
+
# puts "MERGE HIT (#{c.inspect})"
|
109
|
+
end
|
103
110
|
#
|
104
111
|
end
|
105
112
|
|
@@ -9,41 +9,10 @@ require "plugin"
|
|
9
9
|
|
10
10
|
class PluginAdapters < Plugin
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
#then the sequence isn't valid, because almost sequence is adapter.
|
17
|
-
|
18
|
-
|
19
|
-
v1= p_end.to_i
|
20
|
-
v2= p_start.to_i
|
21
|
-
|
22
|
-
# puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
|
23
|
-
|
24
|
-
# puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
|
25
|
-
if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
|
26
|
-
type = "ActionLeftAdapter"
|
27
|
-
|
28
|
-
else
|
29
|
-
type = "ActionRightAdapter"
|
30
|
-
|
31
|
-
end
|
32
|
-
return type
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
def cut_by_right(adapter,seq)
|
37
|
-
|
38
|
-
left_size = adapter.q_beg-seq.insert_start+1
|
39
|
-
right_size = seq.insert_end-adapter.q_end+1
|
40
|
-
left_size=0 if (left_size<0)
|
41
|
-
right_size=0 if (right_size<0)
|
42
|
-
|
43
|
-
return (left_size>(right_size/2).to_i)
|
44
|
-
|
45
|
-
end
|
46
|
-
|
12
|
+
# adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
|
13
|
+
MIN_ADAPTER_SIZE = 5
|
14
|
+
MIN_FAR_ADAPTER_SIZE = 13
|
15
|
+
# MIN_LEFT_ADAPTER_SIZE = 9
|
47
16
|
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
48
17
|
def execute(seqs)
|
49
18
|
blasts= do_blasts(seqs)
|
@@ -55,7 +24,7 @@ class PluginAdapters < Plugin
|
|
55
24
|
|
56
25
|
def do_blasts(seqs)
|
57
26
|
# find MIDS with less results than max_target_seqs value
|
58
|
-
blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -
|
27
|
+
blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
|
59
28
|
$LOG.info('BLAST:'+blast.get_blast_cmd)
|
60
29
|
|
61
30
|
fastas=[]
|
@@ -67,99 +36,179 @@ class PluginAdapters < Plugin
|
|
67
36
|
|
68
37
|
# fastas=fastas.join("\n")
|
69
38
|
|
70
|
-
blast_table_results = blast.do_blast(fastas)
|
39
|
+
blast_table_results = blast.do_blast(fastas,:xml)
|
71
40
|
|
72
41
|
# puts blast_table_results.inspect
|
73
42
|
|
74
43
|
return blast_table_results
|
75
44
|
end
|
76
45
|
|
46
|
+
# filter hits that are in the middle and do not have a valid length
|
47
|
+
def filter_hits(hits,end_pos)
|
48
|
+
|
49
|
+
hits.reverse_each do |hit|
|
50
|
+
if (hit.q_beg>10) && (hit.q_end < (end_pos-10)) && ((hit.q_end-hit.q_beg+1)<(@params.get_adapter(hit.subject_id).length*0.85).to_i)
|
51
|
+
hits.delete(hit)
|
52
|
+
# puts "- DELETE #{hit.subject_id} #{(hit.q_end-hit.q_beg+1)}, < #{(@params.get_adapter(hit.subject_id).length*0.85).to_i} - R:#{hit.reversed}"
|
53
|
+
#
|
54
|
+
# else
|
55
|
+
# puts " ** ACCEPTED #{hit.subject_id} #{hit.q_beg}>6 and #{hit.q_end}<#{end_pos}-10, #{(hit.q_end-hit.q_beg+1)}, >= #{(@params.get_adapter(hit.subject_id).length*0.85).to_i} - R:#{hit.reversed}"
|
56
|
+
# puts " *** #{hit.inspect}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
77
61
|
|
78
62
|
def exec_seq(seq,blast_query)
|
79
63
|
if blast_query.query_id != seq.seq_name
|
80
|
-
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
64
|
+
# raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
81
65
|
end
|
82
66
|
|
83
67
|
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
|
84
68
|
|
85
69
|
|
86
|
-
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
70
|
+
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
|
71
|
+
|
72
|
+
|
87
73
|
|
88
74
|
# blast with only one sequence, no with many sequences from a database
|
89
75
|
#---------------------------------------------------------------------
|
90
76
|
|
91
77
|
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
|
92
78
|
|
93
|
-
|
94
|
-
|
95
|
-
|
79
|
+
#BlastTableResult.new(res)
|
80
|
+
# puts blast_query.inspect
|
81
|
+
# puts blast_table_results.inspect
|
82
|
+
|
83
|
+
filter_hits(blast_query.hits, seq.seq_fasta.length)
|
96
84
|
|
97
85
|
adapters=[]
|
98
86
|
# blast_table_results.querys.each do |query| # first round to save adapters without overlap
|
99
|
-
merge_hits(blast_query,adapters)
|
87
|
+
merge_hits(blast_query.hits,adapters)
|
100
88
|
# end
|
101
89
|
|
102
90
|
begin
|
103
91
|
adapters2=adapters # second round to save adapters without overlap
|
104
92
|
adapters = []
|
105
93
|
merge_hits(adapters2,adapters)
|
106
|
-
end until (adapters2.count == adapters.count)
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
#
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
94
|
+
end until (adapters2.count == adapters.count)
|
95
|
+
|
96
|
+
# puts "MERGED"
|
97
|
+
# puts "="*50
|
98
|
+
# adapters.each {|a| puts a.inspect}
|
99
|
+
|
100
|
+
max_to_end=@params.get_param('max_adapters_to_end').to_i
|
101
|
+
# type = 'ActionAbAdapter'
|
102
|
+
actions=[]
|
103
|
+
adapter_size=0
|
104
|
+
|
105
|
+
#@stats['adapter_size']={}
|
106
|
+
adapters.each do |c| # adds the correspondent action to the sequence
|
107
|
+
# puts "is the adapter near to the end of sequence ? #{c.q_end+seq.insert_start+max_to_end} >= ? #{seq.seq_fasta_orig.size-1}"
|
108
|
+
adapter_size=c.q_end-c.q_beg+1
|
109
|
+
#if ((c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1)
|
110
|
+
right_action = true
|
121
111
|
|
122
|
-
#
|
112
|
+
#if ab adapter is very near to the end of original sequence
|
113
|
+
if c.q_end>=seq.seq_fasta.length-max_to_end
|
114
|
+
# message = c.subject_id
|
115
|
+
message = c.definition
|
116
|
+
type = 'ActionRightAdapter'
|
117
|
+
ignore=false
|
118
|
+
add_stats('adapter_type','right')
|
123
119
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
120
|
+
elsif (c.q_beg <= 6) #&& (adapter_size>=MIN_LEFT_ADAPTER_SIZE) #left adapter
|
121
|
+
# message = c.subject_id
|
122
|
+
message = c.definition
|
123
|
+
type = 'ActionLeftAdapter'
|
124
|
+
ignore = false
|
125
|
+
right_action = false
|
126
|
+
add_stats('adapter_type','left')
|
127
|
+
elsif (adapter_size>=MIN_FAR_ADAPTER_SIZE)
|
128
|
+
# message = c.subject_id
|
129
|
+
message = c.definition
|
130
|
+
type = 'ActionMiddleAdapter'
|
131
|
+
ignore = false
|
132
|
+
add_stats('adapter_type','middle')
|
133
|
+
else
|
134
|
+
ignore=true
|
135
|
+
end
|
130
136
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
137
|
+
if !ignore
|
138
|
+
a = seq.new_action(c.q_beg,c.q_end,type)
|
139
|
+
a.message = message
|
140
|
+
a.reversed = c.reversed
|
141
|
+
if right_action
|
142
|
+
a.right_action = true #mark as rigth action to get the left insert
|
143
|
+
else
|
144
|
+
a.left_action = true
|
145
|
+
end
|
146
|
+
actions.push a
|
139
147
|
|
148
|
+
# puts "adapter_size #{adapter_size}"
|
149
|
+
|
150
|
+
#@stats[:adapter_size]={adapter_size => 1}
|
151
|
+
add_stats('adapter_size',adapter_size)
|
152
|
+
add_stats('adapter_id',message)
|
153
|
+
end
|
140
154
|
end
|
141
|
-
|
155
|
+
|
156
|
+
if !actions.empty?
|
157
|
+
seq.add_actions(actions)
|
158
|
+
add_stats('sequences_with_adapter','count')
|
159
|
+
end
|
160
|
+
|
161
|
+
|
142
162
|
#
|
143
163
|
end
|
144
164
|
|
145
165
|
#Returns an array with the errors due to parameters are missing
|
146
166
|
def self.check_params(params)
|
147
|
-
errors=[]
|
167
|
+
errors=[]
|
148
168
|
|
149
|
-
comment='Blast E-value used as cut-off when searching for adapters
|
150
|
-
default_value = 1e-6
|
169
|
+
comment='Blast E-value used as cut-off when searching for adapters'
|
170
|
+
# default_value = 1e-6
|
171
|
+
default_value = 1
|
151
172
|
params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
|
152
173
|
|
153
174
|
comment='Minimum required identity (%) for a reliable adapter'
|
154
175
|
default_value = 95
|
155
176
|
params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
|
177
|
+
|
178
|
+
comment='Adapters can be found at both ends of the sequence. The following variable indicates the number of nucleotides that are allowed for considering the adapters to be located at the right end'
|
179
|
+
default_value = 9
|
180
|
+
params.check_param(errors,'max_adapters_to_end','Integer',default_value,comment)
|
156
181
|
|
157
|
-
comment='Path for
|
182
|
+
comment='Path for adapters database'
|
158
183
|
default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
|
159
|
-
params.check_param(errors,'adapters_db','DB',default_value,comment)
|
184
|
+
params.check_param(errors,'adapters_db','DB',default_value,comment)
|
160
185
|
|
161
186
|
return errors
|
162
187
|
end
|
163
188
|
|
189
|
+
def self.get_graph_title(plugin_name,stats_name)
|
190
|
+
case stats_name
|
191
|
+
when 'adapter_type'
|
192
|
+
'Adapters by type'
|
193
|
+
when 'adapter_size'
|
194
|
+
'Adapters by size'
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.get_graph_filename(plugin_name,stats_name)
|
199
|
+
return stats_name
|
200
|
+
|
201
|
+
# case stats_name
|
202
|
+
# when 'adapter_type'
|
203
|
+
# 'AB adapters by type'
|
204
|
+
# when 'adapter_size'
|
205
|
+
# 'AB adapters by size'
|
206
|
+
# end
|
207
|
+
end
|
208
|
+
|
209
|
+
def self.valid_graphs
|
210
|
+
return ['adapter_type']
|
211
|
+
end
|
212
|
+
|
164
213
|
|
165
214
|
end
|
@@ -81,7 +81,7 @@ class PluginLinker < Plugin
|
|
81
81
|
|
82
82
|
def do_blasts(seqs)
|
83
83
|
# find MIDS with less results than max_target_seqs value
|
84
|
-
blast = BatchBlast.new("-db #{
|
84
|
+
blast = BatchBlast.new("-db #{@params.get_param('linkers_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
85
85
|
|
86
86
|
$LOG.info('BLAST:'+blast.get_blast_cmd)
|
87
87
|
|
@@ -223,6 +223,10 @@ class PluginLinker < Plugin
|
|
223
223
|
comment='Minimum required identity (%) for a reliable linker'
|
224
224
|
default_value = 95
|
225
225
|
params.check_param(errors,'blast_percent_linkers','Integer',default_value,comment)
|
226
|
+
|
227
|
+
comment='Path for 454 linkers database'
|
228
|
+
default_value = File.join($FORMATTED_DB_PATH,'linkers.fasta')
|
229
|
+
params.check_param(errors,'linkers_db','DB',default_value,comment)
|
226
230
|
|
227
231
|
|
228
232
|
return errors
|
@@ -3,6 +3,6 @@
|
|
3
3
|
# ======================================
|
4
4
|
|
5
5
|
|
6
|
-
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
6
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
7
7
|
|
8
8
|
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|
data/lib/seqtrimnext.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.0.
|
5
|
+
version: 2.0.41
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero & Almudena Bocinos
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-11-07 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: narray
|
@@ -214,6 +214,7 @@ files:
|
|
214
214
|
- lib/seqtrimnext/actions/action_poly_t.rb
|
215
215
|
- lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
|
216
216
|
- lib/seqtrimnext/actions/action_right_adapter.rb
|
217
|
+
- lib/seqtrimnext/actions/action_middle_adapter.rb
|
217
218
|
- lib/seqtrimnext/actions/action_right_primer.rb
|
218
219
|
- lib/seqtrimnext/actions/action_short_insert.rb
|
219
220
|
- lib/seqtrimnext/actions/action_unexpected_poly_t.rb
|