seqtrimnext 2.0.39 → 2.0.41

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,13 @@
1
+ === 2.0.41 2011-11-04
2
+
3
+ Added extra adapters plugin for transcriptomics
4
+
5
+ === 2.0.40 2011-10-27
6
+
7
+ -Support Relative paths in user DB
8
+ -Can mix relative and absolute paths in user DB
9
+ -Custom linkers database
10
+
1
11
  === 2.0.39 2011-07-07
2
12
 
3
13
  Fixed database list option
data/Manifest.txt CHANGED
@@ -41,6 +41,7 @@ lib/seqtrimnext/actions/action_poly_a.rb
41
41
  lib/seqtrimnext/actions/action_poly_t.rb
42
42
  lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
43
43
  lib/seqtrimnext/actions/action_right_adapter.rb
44
+ lib/seqtrimnext/actions/action_middle_adapter.rb
44
45
  lib/seqtrimnext/actions/action_right_primer.rb
45
46
  lib/seqtrimnext/actions/action_short_insert.rb
46
47
  lib/seqtrimnext/actions/action_unexpected_poly_t.rb
data/README.rdoc CHANGED
@@ -118,15 +118,26 @@ The same way you can modify any of the parameters. You can find all parameters a
118
118
  == REQUIREMENTS:
119
119
 
120
120
  * Ruby 1.9.2
121
+ * CD-HIT 4.5.3 or greater
121
122
  * Blast plus 2.24 or greater (prior versions have bugs that produces bad results)
122
123
  * [Optional] - GnuPlot version 4.4.2 or greater (prior versions may produce wrong graphs)
123
124
  * [Optional] - pdflatex - Optional, to produce a detailed report with results
124
125
 
126
+
125
127
  == INSTALL:
126
128
 
129
+ === Installing CD-HIT
130
+
131
+ *Download the latest version from http://code.google.com/p/cdhit/downloads/list
132
+ *You can also use a precompiled version if you like
133
+ *To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
134
+
135
+ make
136
+ sudo make install
137
+
127
138
  === Installing Blast
128
139
 
129
- *Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/release/LATEST/
140
+ *Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
130
141
  *You can also use a precompiled version if you like
131
142
  *To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
132
143
 
@@ -0,0 +1,29 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionMiddleAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class}. BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta
27
+ end
28
+
29
+ end
@@ -13,6 +13,7 @@ class Params
13
13
  # @param_order={}
14
14
  @mids = {}
15
15
  @ab_adapters={}
16
+ @adapters={}
16
17
  @linkers = {}
17
18
  @clusters = {}
18
19
 
@@ -55,17 +56,36 @@ class Params
55
56
  end
56
57
  end# end def
57
58
 
58
- def load_db_fastas(path_file)
59
+ def load_db_fastas(input_paths)
60
+
59
61
  res={}
60
- if File.exists?(path_file)
61
- ff = FastaFile.new(path_file)
62
- ff.each {|n,f|
63
- # @mid_sizes[n]=f.size
64
- res[n]=f
65
- }
66
-
67
- ff.close
62
+
63
+ if (!input_paths.nil?) & (input_paths!='')
64
+ # remove quotes
65
+ paths=input_paths.gsub(/\A['"]+|['"]+\Z/, "")
66
+
67
+ # split paths by spaces
68
+ # puts "PATHS:"
69
+ # puts paths.split(' ')
70
+ paths.split(' ').each do |path_file|
71
+
72
+ if File.exists?(path_file)
73
+ ff = FastaFile.new(path_file)
74
+ ff.each {|n,f|
75
+ res[n]=f
76
+ }
77
+
78
+ ff.close
79
+ end
80
+ end
81
+
68
82
  end
83
+
84
+ # puts "LOADED_DB #{paths}:"
85
+ # res.each do |k,v|
86
+ # puts k
87
+ # end
88
+
69
89
  return res
70
90
  end
71
91
 
@@ -78,13 +98,19 @@ class Params
78
98
  # Load ab_adapters file
79
99
  def load_ab_adapters(path_file)
80
100
  @ab_adapters=load_db_fastas(path_file)
81
- # puts @ab_adapters
101
+ # puts @ab_adapters
102
+ end
103
+
104
+ # load normal adapters
105
+ def load_adapters(path_file)
106
+ @adapters=load_db_fastas(path_file)
82
107
  end
83
108
 
109
+
84
110
  # Load mid's file
85
111
  def load_linkers(path_file)
86
112
  @linkers=load_db_fastas(path_file)
87
- # puts @linkers
113
+ # puts @linkers
88
114
  end
89
115
 
90
116
  def load_repeated_seqs(file_path)
@@ -156,19 +182,38 @@ class Params
156
182
  return @params[param]
157
183
  end
158
184
 
185
+ def get_fasta(list,name,type)
186
+ res = list[name]
187
+
188
+ if res.nil?
189
+ $LOG.error("Error. The #{type}: #{name} was not correctly loaded")
190
+ raise "Error. The #{type}: #{name} was not found in loaded #{name}s: #{list.map{|k,v| k}}."
191
+ end
192
+
193
+ return res
194
+ end
195
+
159
196
  # Return the mid's size of param
160
- def get_mid(param)
161
- return @mids[param]
197
+ def get_mid(mid)
198
+ # return @mids[mid]
199
+ return get_fasta(@mids,mid,"mid")
162
200
  end
163
201
 
164
202
  # Return the linker of param
165
203
  def get_linker(linker)
166
- return @linkers[linker]
204
+ # return @linkers[linker]
205
+ return get_fasta(@linkers,linker,"linker")
167
206
  end
168
207
 
169
208
  # Return the ab of param
170
209
  def get_ab_adapter(adapter)
171
- return @ab_adapters[adapter]
210
+ # return @ab_adapters[adapter]
211
+ return get_fasta(@ab_adapters,adapter,"ab_adapter")
212
+ end
213
+
214
+ def get_adapter(adapter)
215
+ # return @adapters[adapter]
216
+ return get_fasta(@adapters,adapter,"adapter")
172
217
  end
173
218
 
174
219
 
@@ -300,16 +345,21 @@ class Params
300
345
  # expand database paths
301
346
  dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
302
347
  # puts "ALGO"*20
303
- puts dbs.join(',')
348
+ # puts "INPUT DATABASES:\n"+dbs.join(',')
349
+
350
+ procesed_dbs=[]
304
351
  #
305
352
  # TODO - chequear aqui que la db no esta vacia y que esta formateada.
306
- dbs.reverse_each {|db|
353
+ dbs.reverse_each {|db_p|
354
+ db=File.expand_path(db_p)
355
+
307
356
  if !File.exists?(db)
308
- path=File.join($FORMATTED_DB_PATH,db)
357
+ path=File.join($FORMATTED_DB_PATH,db_p)
309
358
  else
310
359
  path=db
311
360
  end
312
361
 
362
+
313
363
  if Dir.glob(path+'*.n*').entries.empty?
314
364
  puts "DB file #{path} not formatted"
315
365
 
@@ -321,15 +371,19 @@ class Params
321
371
  end
322
372
  end
323
373
 
374
+ procesed_dbs << path
375
+
324
376
  if !File.exists?(path)
325
377
  raise "DB File #{path} does not exists"
326
378
  # exit
327
379
  end
328
380
  }
329
- db_paths = '"'+dbs.join(' ')+'"'
381
+
382
+ db_paths = '"'+procesed_dbs.join(' ')+'"'
330
383
 
331
384
  set_param(db_param_name,db_paths)
332
- # puts "DATABASES"+db_paths
385
+
386
+ puts "USED DATABASES\n"+db_paths
333
387
  end
334
388
 
335
389
 
@@ -351,7 +405,7 @@ class Params
351
405
  end
352
406
 
353
407
  def check_param(errors,param,param_class,default_value=nil, comment=nil)
354
-
408
+
355
409
  if !exists?(param)
356
410
  if default_value.nil? #|| (default_value.is_a?(String) && default_value.empty?)
357
411
  errors.push "The param #{param} is required and no default value is available"
@@ -360,41 +414,41 @@ class Params
360
414
  end
361
415
  end
362
416
 
363
- s = get_param(param)
417
+ s = get_param(param)
364
418
 
365
419
 
366
- set_comment(get_plugin,param,comment)
420
+ set_comment(get_plugin,param,comment)
367
421
 
368
- # check_class=Object.const_get(param_class)
369
- begin
422
+ # check_class=Object.const_get(param_class)
423
+ begin
370
424
 
371
- case param_class
372
- when 'Integer'
373
- r = Integer(s)
374
- when 'Float'
375
- r = Float(s)
376
- when 'String'
377
- r = String(s)
378
- when 'DB'
379
- # it is a string
380
- r = String(s)
381
- # and must be a valid db
382
-
383
- r = check_db_param(errors,param)
425
+ case param_class
426
+ when 'Integer'
427
+ r = Integer(s)
428
+ when 'Float'
429
+ r = Float(s)
430
+ when 'String'
431
+ r = String(s)
432
+ when 'DB'
433
+ # it is a string
434
+ r = String(s)
435
+ # and must be a valid db
384
436
 
385
- when 'PluginList'
386
- r=String(s)
387
- r= check_plugin_list_param(errors,param)
388
- end
437
+ r = check_db_param(errors,param)
389
438
 
390
- rescue Exception => e
391
- message="Current value is ##{s}#. "
392
- if param_class=='DB'
393
- message += e.message
394
- end
439
+ when 'PluginList'
440
+ r=String(s)
441
+ r= check_plugin_list_param(errors,param)
442
+ end
395
443
 
396
- errors.push "Param #{param} is not a valid #{param_class}. #{message}"
444
+ rescue Exception => e
445
+ message="Current value is ##{s}#. "
446
+ if param_class=='DB'
447
+ message += e.message
397
448
  end
449
+
450
+ errors.push "Param #{param} is not a valid #{param_class}. #{message}"
451
+ end
398
452
  # end
399
453
 
400
454
  end
@@ -230,14 +230,15 @@ class Seqtrim
230
230
  #MakeBlastDb.format_db(es.truncated_file_path,File.basename(es.truncated_file_path,File.extname(es.truncated_file_path)),'./') if piro_on
231
231
 
232
232
  # leer mids
233
- params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
234
- params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
235
- params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
233
+ # params.load_mids(File.join($FORMATTED_DB_PATH,'mids.fasta'))
234
+ # params.load_ab_adapters(File.join($FORMATTED_DB_PATH,'adapters_ab.fasta'))
235
+ # params.load_linkers(File.join($FORMATTED_DB_PATH,'linkers.fasta'))
236
236
 
237
+ params.load_mids(params.get_param('mids_db'))
238
+ params.load_ab_adapters(params.get_param('adapters_ab_db'))
239
+ params.load_adapters(params.get_param('adapters_db'))
240
+ params.load_linkers(params.get_param('linkers_db'))
237
241
 
238
-
239
-
240
-
241
242
  #execute cd-hit
242
243
  if params.get_param('remove_clonality')=='true'
243
244
 
@@ -71,7 +71,8 @@ class Plugin
71
71
 
72
72
  def merge_hits(hits,merged_hits,merged_ids=nil)
73
73
  # puts " merging ============"
74
- hits.each do |hit|
74
+ # hits.each do |hit|
75
+ hits.sort{|h1,h2| (h1.q_end-h1.q_beg+1)<=>(h2.q_end-h2.q_beg+1)}.reverse_each do |hit|
75
76
 
76
77
  merged_ids.push hit.definition if !merged_ids.nil? && (! merged_ids.include?(hit.definition))
77
78
  # if new hit's position is already contained in hits, then ignore the new hit
@@ -86,20 +87,26 @@ class Plugin
86
87
  #contaminants.push({:q_begin=>hit.q_beg,:q_end=>hit.q_end,:name=>hit.subject_id})
87
88
  #
88
89
  else
89
- # merge with old contaminant
90
- min=[c.q_beg,hit.q_beg].min
91
- max=[c.q_end,hit.q_end].max
92
90
 
93
- c.q_beg=min
94
- c.q_end=max
91
+ # one is inside each other, just ignore
92
+ if ((hit.q_beg>=c.q_beg && hit.q_end <=c.q_end) || (c.q_beg>=hit.q_beg && c.q_end <= hit.q_end))
93
+ # puts "* #{hit.subject_id} inside #{c.subject_id}"
94
+ else
95
+ # merge with old contaminant
96
+ # puts "#{hit.subject_id} NOT inside #{c.subject_id}"
97
+ min=[c.q_beg,hit.q_beg].min
98
+ max=[c.q_end,hit.q_end].max
95
99
 
100
+ c.q_beg=min
101
+ c.q_end=max
96
102
 
97
- # DONE para describir cada Id contaminante encontrado
98
- # puts "1 -#{c.subject_id}- -#{hit.subject_id}-"
99
- c.subject_id += ' ' + hit.subject_id if (not c.subject_id.include?(hit.subject_id))
100
- # puts "2 -#{c.subject_id}- -#{hit.subject_id}-"
101
- # puts "MERGE HIT (#{c.inspect})"
102
103
 
104
+ # DONE para describir cada Id contaminante encontrado
105
+ # puts "1 -#{c.subject_id}- -#{hit.subject_id}-"
106
+ c.subject_id += ' ' + hit.subject_id if (not c.subject_id.include?(hit.subject_id))
107
+ # puts "2 -#{c.subject_id}- -#{hit.subject_id}-"
108
+ # puts "MERGE HIT (#{c.inspect})"
109
+ end
103
110
  #
104
111
  end
105
112
 
@@ -9,41 +9,10 @@ require "plugin"
9
9
 
10
10
  class PluginAdapters < Plugin
11
11
 
12
- def get_type_adapter(p_start,p_end,seq)
13
- #if q_beg is nearer the left, add adapter action by the left,
14
- #if q_end esta is nearer the right , add adapter action by the right
15
- #NOTE: If the adapter is very near from left and rigth,
16
- #then the sequence isn't valid, because almost sequence is adapter.
17
-
18
-
19
- v1= p_end.to_i
20
- v2= p_start.to_i
21
-
22
- # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
-
24
- # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
- if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
- type = "ActionLeftAdapter"
27
-
28
- else
29
- type = "ActionRightAdapter"
30
-
31
- end
32
- return type
33
- end
34
-
35
-
36
- def cut_by_right(adapter,seq)
37
-
38
- left_size = adapter.q_beg-seq.insert_start+1
39
- right_size = seq.insert_end-adapter.q_end+1
40
- left_size=0 if (left_size<0)
41
- right_size=0 if (right_size<0)
42
-
43
- return (left_size>(right_size/2).to_i)
44
-
45
- end
46
-
12
+ # adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
13
+ MIN_ADAPTER_SIZE = 5
14
+ MIN_FAR_ADAPTER_SIZE = 13
15
+ # MIN_LEFT_ADAPTER_SIZE = 9
47
16
  #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
48
17
  def execute(seqs)
49
18
  blasts= do_blasts(seqs)
@@ -55,7 +24,7 @@ class PluginAdapters < Plugin
55
24
 
56
25
  def do_blasts(seqs)
57
26
  # find MIDS with less results than max_target_seqs value
58
- blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
27
+ blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
59
28
  $LOG.info('BLAST:'+blast.get_blast_cmd)
60
29
 
61
30
  fastas=[]
@@ -67,99 +36,179 @@ class PluginAdapters < Plugin
67
36
 
68
37
  # fastas=fastas.join("\n")
69
38
 
70
- blast_table_results = blast.do_blast(fastas)
39
+ blast_table_results = blast.do_blast(fastas,:xml)
71
40
 
72
41
  # puts blast_table_results.inspect
73
42
 
74
43
  return blast_table_results
75
44
  end
76
45
 
46
+ # filter hits that are in the middle and do not have a valid length
47
+ def filter_hits(hits,end_pos)
48
+
49
+ hits.reverse_each do |hit|
50
+ if (hit.q_beg>10) && (hit.q_end < (end_pos-10)) && ((hit.q_end-hit.q_beg+1)<(@params.get_adapter(hit.subject_id).length*0.85).to_i)
51
+ hits.delete(hit)
52
+ # puts "- DELETE #{hit.subject_id} #{(hit.q_end-hit.q_beg+1)}, < #{(@params.get_adapter(hit.subject_id).length*0.85).to_i} - R:#{hit.reversed}"
53
+ #
54
+ # else
55
+ # puts " ** ACCEPTED #{hit.subject_id} #{hit.q_beg}>6 and #{hit.q_end}<#{end_pos}-10, #{(hit.q_end-hit.q_beg+1)}, >= #{(@params.get_adapter(hit.subject_id).length*0.85).to_i} - R:#{hit.reversed}"
56
+ # puts " *** #{hit.inspect}"
57
+ end
58
+ end
59
+
60
+ end
77
61
 
78
62
  def exec_seq(seq,blast_query)
79
63
  if blast_query.query_id != seq.seq_name
80
- raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
64
+ # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
81
65
  end
82
66
 
83
67
  $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
84
68
 
85
69
 
86
- # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
70
+ # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
71
+
72
+
87
73
 
88
74
  # blast with only one sequence, no with many sequences from a database
89
75
  #---------------------------------------------------------------------
90
76
 
91
77
  # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
92
78
 
93
- #blast_table_results = BlastTableResult.new(res)
94
-
95
- # blast_table_results.inspect
79
+ #BlastTableResult.new(res)
80
+ # puts blast_query.inspect
81
+ # puts blast_table_results.inspect
82
+
83
+ filter_hits(blast_query.hits, seq.seq_fasta.length)
96
84
 
97
85
  adapters=[]
98
86
  # blast_table_results.querys.each do |query| # first round to save adapters without overlap
99
- merge_hits(blast_query,adapters)
87
+ merge_hits(blast_query.hits,adapters)
100
88
  # end
101
89
 
102
90
  begin
103
91
  adapters2=adapters # second round to save adapters without overlap
104
92
  adapters = []
105
93
  merge_hits(adapters2,adapters)
106
- end until (adapters2.count == adapters.count)
107
-
108
- actions=[]
109
- adapter_size=0
110
- # @stats['adapter_size']={}
111
- adapters.each do |ad| # adds the correspondent action to the sequence
112
-
113
- type = get_type_adapter(ad.q_beg,ad.q_end,seq)
114
- a = seq.new_action(ad.q_beg,ad.q_end,type)
115
- # puts " state left_action #{a.left_action} right_action #{a.right_action}"
116
-
117
-
118
- adapter_size=ad.q_end-ad.q_beg+1
119
-
120
- if cut_by_right(ad,seq)
94
+ end until (adapters2.count == adapters.count)
95
+
96
+ # puts "MERGED"
97
+ # puts "="*50
98
+ # adapters.each {|a| puts a.inspect}
99
+
100
+ max_to_end=@params.get_param('max_adapters_to_end').to_i
101
+ # type = 'ActionAbAdapter'
102
+ actions=[]
103
+ adapter_size=0
104
+
105
+ #@stats['adapter_size']={}
106
+ adapters.each do |c| # adds the correspondent action to the sequence
107
+ # puts "is the adapter near to the end of sequence ? #{c.q_end+seq.insert_start+max_to_end} >= ? #{seq.seq_fasta_orig.size-1}"
108
+ adapter_size=c.q_end-c.q_beg+1
109
+ #if ((c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1)
110
+ right_action = true
121
111
 
122
- # puts "action right end1 #{seq.insert_end}"
112
+ #if ab adapter is very near to the end of original sequence
113
+ if c.q_end>=seq.seq_fasta.length-max_to_end
114
+ # message = c.subject_id
115
+ message = c.definition
116
+ type = 'ActionRightAdapter'
117
+ ignore=false
118
+ add_stats('adapter_type','right')
123
119
 
124
- a.right_action=true #mark rigth action to get the left insert
125
- else
126
-
127
- # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
128
-
129
- a.left_action = true #mark left action to get the right insert
120
+ elsif (c.q_beg <= 6) #&& (adapter_size>=MIN_LEFT_ADAPTER_SIZE) #left adapter
121
+ # message = c.subject_id
122
+ message = c.definition
123
+ type = 'ActionLeftAdapter'
124
+ ignore = false
125
+ right_action = false
126
+ add_stats('adapter_type','left')
127
+ elsif (adapter_size>=MIN_FAR_ADAPTER_SIZE)
128
+ # message = c.subject_id
129
+ message = c.definition
130
+ type = 'ActionMiddleAdapter'
131
+ ignore = false
132
+ add_stats('adapter_type','middle')
133
+ else
134
+ ignore=true
135
+ end
130
136
 
131
- end
132
-
133
- a.message = ad.subject_id
134
- a.reversed = ad.reversed
135
- actions.push a
136
-
137
- # @stats[:adapter_size]={adapter_size => 1}
138
- add_stats('adapter_size',adapter_size)
137
+ if !ignore
138
+ a = seq.new_action(c.q_beg,c.q_end,type)
139
+ a.message = message
140
+ a.reversed = c.reversed
141
+ if right_action
142
+ a.right_action = true #mark as rigth action to get the left insert
143
+ else
144
+ a.left_action = true
145
+ end
146
+ actions.push a
139
147
 
148
+ # puts "adapter_size #{adapter_size}"
149
+
150
+ #@stats[:adapter_size]={adapter_size => 1}
151
+ add_stats('adapter_size',adapter_size)
152
+ add_stats('adapter_id',message)
153
+ end
140
154
  end
141
- seq.add_actions(actions)
155
+
156
+ if !actions.empty?
157
+ seq.add_actions(actions)
158
+ add_stats('sequences_with_adapter','count')
159
+ end
160
+
161
+
142
162
  #
143
163
  end
144
164
 
145
165
  #Returns an array with the errors due to parameters are missing
146
166
  def self.check_params(params)
147
- errors=[]
167
+ errors=[]
148
168
 
149
- comment='Blast E-value used as cut-off when searching for adapters or primers'
150
- default_value = 1e-6
169
+ comment='Blast E-value used as cut-off when searching for adapters'
170
+ # default_value = 1e-6
171
+ default_value = 1
151
172
  params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
152
173
 
153
174
  comment='Minimum required identity (%) for a reliable adapter'
154
175
  default_value = 95
155
176
  params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
177
+
178
+ comment='Adapters can be found at both ends of the sequence. The following variable indicates the number of nucleotides that are allowed for considering the adapters to be located at the right end'
179
+ default_value = 9
180
+ params.check_param(errors,'max_adapters_to_end','Integer',default_value,comment)
156
181
 
157
- comment='Path for adapter database'
182
+ comment='Path for adapters database'
158
183
  default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
159
- params.check_param(errors,'adapters_db','DB',default_value,comment)
184
+ params.check_param(errors,'adapters_db','DB',default_value,comment)
160
185
 
161
186
  return errors
162
187
  end
163
188
 
189
+ def self.get_graph_title(plugin_name,stats_name)
190
+ case stats_name
191
+ when 'adapter_type'
192
+ 'Adapters by type'
193
+ when 'adapter_size'
194
+ 'Adapters by size'
195
+ end
196
+ end
197
+
198
+ def self.get_graph_filename(plugin_name,stats_name)
199
+ return stats_name
200
+
201
+ # case stats_name
202
+ # when 'adapter_type'
203
+ # 'AB adapters by type'
204
+ # when 'adapter_size'
205
+ # 'AB adapters by size'
206
+ # end
207
+ end
208
+
209
+ def self.valid_graphs
210
+ return ['adapter_type']
211
+ end
212
+
164
213
 
165
214
  end
@@ -81,7 +81,7 @@ class PluginLinker < Plugin
81
81
 
82
82
  def do_blasts(seqs)
83
83
  # find MIDS with less results than max_target_seqs value
84
- blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
84
+ blast = BatchBlast.new("-db #{@params.get_param('linkers_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
85
85
 
86
86
  $LOG.info('BLAST:'+blast.get_blast_cmd)
87
87
 
@@ -223,6 +223,10 @@ class PluginLinker < Plugin
223
223
  comment='Minimum required identity (%) for a reliable linker'
224
224
  default_value = 95
225
225
  params.check_param(errors,'blast_percent_linkers','Integer',default_value,comment)
226
+
227
+ comment='Path for 454 linkers database'
228
+ default_value = File.join($FORMATTED_DB_PATH,'linkers.fasta')
229
+ params.check_param(errors,'linkers_db','DB',default_value,comment)
226
230
 
227
231
 
228
232
  return errors
@@ -3,6 +3,6 @@
3
3
  # ======================================
4
4
 
5
5
 
6
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
6
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
7
7
 
8
8
  contaminants_db="contaminants.fasta cont_ribosome.fasta"
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.39'
33
+ VERSION = '2.0.41'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.39
5
+ version: 2.0.41
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-07-07 00:00:00 Z
13
+ date: 2011-11-07 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -214,6 +214,7 @@ files:
214
214
  - lib/seqtrimnext/actions/action_poly_t.rb
215
215
  - lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
216
216
  - lib/seqtrimnext/actions/action_right_adapter.rb
217
+ - lib/seqtrimnext/actions/action_middle_adapter.rb
217
218
  - lib/seqtrimnext/actions/action_right_primer.rb
218
219
  - lib/seqtrimnext/actions/action_short_insert.rb
219
220
  - lib/seqtrimnext/actions/action_unexpected_poly_t.rb