seqtrimnext 2.0.49 → 2.0.50

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.50 2012-06-13
2
+
3
+ Added classification plugin
4
+
1
5
  === 2.0.49 2012-05-24
2
6
 
3
7
  Updated gem dependencies
data/Manifest.txt CHANGED
@@ -24,6 +24,7 @@ History.txt
24
24
  lib/seqtrimnext/actions/action_ab_adapter.rb
25
25
  lib/seqtrimnext/actions/action_ab_far_adapter.rb
26
26
  lib/seqtrimnext/actions/action_ab_left_adapter.rb
27
+ lib/seqtrimnext/actions/action_classify.rb
27
28
  lib/seqtrimnext/actions/action_empty_insert.rb
28
29
  lib/seqtrimnext/actions/action_ignore_repeated.rb
29
30
  lib/seqtrimnext/actions/action_indetermination.rb
@@ -77,6 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
77
78
  lib/seqtrimnext/plugins/plugin_adapters_old.rb
78
79
  lib/seqtrimnext/plugins/plugin_amplicons.rb
79
80
  lib/seqtrimnext/plugins/plugin_contaminants.rb
81
+ lib/seqtrimnext/plugins/plugin_classify.rb
80
82
  lib/seqtrimnext/plugins/plugin_extract_inserts.rb
81
83
  lib/seqtrimnext/plugins/plugin_find_poly_at.rb
82
84
  lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -0,0 +1,23 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionClassify < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =false
15
+ end
16
+
17
+ def apply_decoration(char)
18
+ return char.yellow
19
+
20
+ end
21
+
22
+
23
+ end
@@ -144,6 +144,7 @@ class SeqtrimWorker < ScbiMapreduce::Worker
144
144
  # find mids
145
145
  mid = seq.get_actions(ActionMid).first
146
146
 
147
+
147
148
  if (seq.seq_rejected) # sequence rejected
148
149
 
149
150
  #save to rejected sequences
@@ -203,6 +203,8 @@ class Seqtrim
203
203
  format = :sanger
204
204
  end
205
205
 
206
+ $LOG.info("Used FastQ format for input files: #{format}")
207
+
206
208
  sequence_reader = FastqFile.new(seqs_path,'r',format, true)
207
209
  # cd_hit_input_file = 'cd-hit-input.fasta'
208
210
  cd_hit_input_file = seqs_path
@@ -79,7 +79,6 @@ class Plugin
79
79
  c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
80
80
  # puts " c #{c.inspect}"
81
81
 
82
-
83
82
  if (c.nil?)
84
83
  # add new contaminant
85
84
  #puts "NEW HIT #{hit.inspect}"
@@ -0,0 +1,143 @@
1
+ require "plugin"
2
+
3
+ require "make_blast_db"
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute Pluginclassify
8
+ # Inherit: Plugin
9
+ ########################################################
10
+
11
+ class PluginClassify < Plugin
12
+
13
+
14
+ MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
15
+
16
+
17
+ def near_to_extrem(c,seq,min_cont_size)
18
+ max_to_extreme=(min_cont_size/2).to_i
19
+ return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
20
+ end
21
+
22
+ def sum_hits_by_id(hits)
23
+ res={}
24
+
25
+ hits.each do |c|
26
+ hit_size=c.q_end - c.q_beg + 1
27
+
28
+ res[c.definition] = (res[c.definition]||0)+hit_size
29
+
30
+ end
31
+
32
+ puts res.to_json
33
+ return res
34
+ end
35
+
36
+ #Begins the plugin1's execution to warn that there are classify in the sequence "seq"
37
+ def execute(seqs)
38
+ blasts= do_blasts(seqs)
39
+
40
+ seqs.each_with_index do |s,i|
41
+ exec_seq(s,blasts.querys[i])
42
+ end
43
+ end
44
+
45
+ def do_blasts(seqs)
46
+
47
+ # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
48
+ # y una secuencia de baja complejidad como entrada
49
+
50
+ blast = BatchBlast.new("-db #{@params.get_param('classify_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
51
+
52
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
53
+
54
+ fastas=[]
55
+
56
+ seqs.each do |seq|
57
+ fastas.push ">"+seq.seq_name
58
+ fastas.push seq.seq_fasta
59
+ end
60
+
61
+
62
+ blast_table_results = blast.do_blast(fastas,:xml)
63
+
64
+ return blast_table_results
65
+ end
66
+
67
+
68
+ def exec_seq(seq,blast_query)
69
+ if blast_query.query_id != seq.seq_name
70
+ # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
71
+ end
72
+
73
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
74
+
75
+ type = "ActionClassify"
76
+
77
+ classify={}
78
+
79
+ # classify_ids=[]
80
+
81
+ classify=sum_hits_by_id(blast_query.hits)
82
+
83
+ actions=[]
84
+ classify_size=0
85
+
86
+ min_cont_size=@params.get_param('min_classify_hit_size').to_i
87
+
88
+ biggest_classify = classify.sort {|c1,c2| c1[1]<=>c2[1]}
89
+
90
+ if !biggest_classify.empty?
91
+
92
+ definition,classify_size = biggest_classify.last
93
+
94
+
95
+ a = seq.new_action(-1,-1,type) # adds the correspondent action to the sequence
96
+
97
+ a.message = definition
98
+
99
+ a.tag_id = definition.gsub(' ','_')
100
+
101
+ # a.found_definition = c.definition # save the classify definitions, each separately
102
+
103
+ #save to this file
104
+ seq.add_file_tag(1, a.tag_id, :file)
105
+
106
+
107
+ actions.push a
108
+
109
+ add_stats('classify_size',classify_size)
110
+ add_stats('classify_ids',definition)
111
+
112
+ seq.add_actions(actions)
113
+ end
114
+
115
+ end
116
+
117
+ #Returns an array with the errors due to parameters are missing
118
+ def self.check_params(params)
119
+ errors=[]
120
+
121
+
122
+ comment='Blast E-value used as cut-off when searching for contaminations'
123
+ default_value = 1e-10
124
+ params.check_param(errors,'blast_evalue_classify','Float',default_value,comment)
125
+
126
+ comment='Minimum required identity (%) for a reliable classify'
127
+ default_value = 85
128
+ params.check_param(errors,'blast_percent_classify','Integer',default_value,comment)
129
+
130
+ comment='Minimum hit size (nt) for considering to classify'
131
+ default_value = 30 # era 40
132
+ params.check_param(errors,'min_classify_hit_size','Integer',default_value,comment)
133
+
134
+ comment='Path for classify database'
135
+ default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
136
+ params.check_param(errors,'classify_db','DB',default_value,comment)
137
+
138
+
139
+ return errors
140
+ end
141
+
142
+
143
+ end
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.49'
33
+ VERSION = '2.0.50'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.49
5
+ version: 2.0.50
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-05-24 00:00:00 Z
13
+ date: 2012-06-13 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -201,6 +201,7 @@ files:
201
201
  - lib/seqtrimnext/actions/action_ab_adapter.rb
202
202
  - lib/seqtrimnext/actions/action_ab_far_adapter.rb
203
203
  - lib/seqtrimnext/actions/action_ab_left_adapter.rb
204
+ - lib/seqtrimnext/actions/action_classify.rb
204
205
  - lib/seqtrimnext/actions/action_empty_insert.rb
205
206
  - lib/seqtrimnext/actions/action_ignore_repeated.rb
206
207
  - lib/seqtrimnext/actions/action_indetermination.rb
@@ -254,6 +255,7 @@ files:
254
255
  - lib/seqtrimnext/plugins/plugin_adapters_old.rb
255
256
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
256
257
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
258
+ - lib/seqtrimnext/plugins/plugin_classify.rb
257
259
  - lib/seqtrimnext/plugins/plugin_extract_inserts.rb
258
260
  - lib/seqtrimnext/plugins/plugin_find_poly_at.rb
259
261
  - lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -319,7 +321,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
321
  requirements: []
320
322
 
321
323
  rubyforge_project: seqtrimnext
322
- rubygems_version: 1.7.2
324
+ rubygems_version: 1.8.24
323
325
  signing_key:
324
326
  specification_version: 3
325
327
  summary: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data