seqtrimnext 2.0.49 → 2.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.50 2012-06-13
2
+
3
+ Added classification plugin
4
+
1
5
  === 2.0.49 2012-05-24
2
6
 
3
7
  Updated gem dependencies
data/Manifest.txt CHANGED
@@ -24,6 +24,7 @@ History.txt
24
24
  lib/seqtrimnext/actions/action_ab_adapter.rb
25
25
  lib/seqtrimnext/actions/action_ab_far_adapter.rb
26
26
  lib/seqtrimnext/actions/action_ab_left_adapter.rb
27
+ lib/seqtrimnext/actions/action_classify.rb
27
28
  lib/seqtrimnext/actions/action_empty_insert.rb
28
29
  lib/seqtrimnext/actions/action_ignore_repeated.rb
29
30
  lib/seqtrimnext/actions/action_indetermination.rb
@@ -77,6 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
77
78
  lib/seqtrimnext/plugins/plugin_adapters_old.rb
78
79
  lib/seqtrimnext/plugins/plugin_amplicons.rb
79
80
  lib/seqtrimnext/plugins/plugin_contaminants.rb
81
+ lib/seqtrimnext/plugins/plugin_classify.rb
80
82
  lib/seqtrimnext/plugins/plugin_extract_inserts.rb
81
83
  lib/seqtrimnext/plugins/plugin_find_poly_at.rb
82
84
  lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -0,0 +1,23 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionClassify < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =false
15
+ end
16
+
17
+ def apply_decoration(char)
18
+ return char.yellow
19
+
20
+ end
21
+
22
+
23
+ end
@@ -144,6 +144,7 @@ class SeqtrimWorker < ScbiMapreduce::Worker
144
144
  # find mids
145
145
  mid = seq.get_actions(ActionMid).first
146
146
 
147
+
147
148
  if (seq.seq_rejected) # sequence rejected
148
149
 
149
150
  #save to rejected sequences
@@ -203,6 +203,8 @@ class Seqtrim
203
203
  format = :sanger
204
204
  end
205
205
 
206
+ $LOG.info("Used FastQ format for input files: #{format}")
207
+
206
208
  sequence_reader = FastqFile.new(seqs_path,'r',format, true)
207
209
  # cd_hit_input_file = 'cd-hit-input.fasta'
208
210
  cd_hit_input_file = seqs_path
@@ -79,7 +79,6 @@ class Plugin
79
79
  c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
80
80
  # puts " c #{c.inspect}"
81
81
 
82
-
83
82
  if (c.nil?)
84
83
  # add new contaminant
85
84
  #puts "NEW HIT #{hit.inspect}"
@@ -0,0 +1,143 @@
1
+ require "plugin"
2
+
3
+ require "make_blast_db"
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute Pluginclassify
8
+ # Inherit: Plugin
9
+ ########################################################
10
+
11
+ class PluginClassify < Plugin
12
+
13
+
14
+ MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
15
+
16
+
17
+ def near_to_extrem(c,seq,min_cont_size)
18
+ max_to_extreme=(min_cont_size/2).to_i
19
+ return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
20
+ end
21
+
22
+ def sum_hits_by_id(hits)
23
+ res={}
24
+
25
+ hits.each do |c|
26
+ hit_size=c.q_end - c.q_beg + 1
27
+
28
+ res[c.definition] = (res[c.definition]||0)+hit_size
29
+
30
+ end
31
+
32
+ puts res.to_json
33
+ return res
34
+ end
35
+
36
+ #Begins the plugin1's execution to warn that there are classify in the sequence "seq"
37
+ def execute(seqs)
38
+ blasts= do_blasts(seqs)
39
+
40
+ seqs.each_with_index do |s,i|
41
+ exec_seq(s,blasts.querys[i])
42
+ end
43
+ end
44
+
45
+ def do_blasts(seqs)
46
+
47
+ # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
48
+ # y una secuencia de baja complejidad como entrada
49
+
50
+ blast = BatchBlast.new("-db #{@params.get_param('classify_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
51
+
52
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
53
+
54
+ fastas=[]
55
+
56
+ seqs.each do |seq|
57
+ fastas.push ">"+seq.seq_name
58
+ fastas.push seq.seq_fasta
59
+ end
60
+
61
+
62
+ blast_table_results = blast.do_blast(fastas,:xml)
63
+
64
+ return blast_table_results
65
+ end
66
+
67
+
68
+ def exec_seq(seq,blast_query)
69
+ if blast_query.query_id != seq.seq_name
70
+ # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
71
+ end
72
+
73
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
74
+
75
+ type = "ActionClassify"
76
+
77
+ classify={}
78
+
79
+ # classify_ids=[]
80
+
81
+ classify=sum_hits_by_id(blast_query.hits)
82
+
83
+ actions=[]
84
+ classify_size=0
85
+
86
+ min_cont_size=@params.get_param('min_classify_hit_size').to_i
87
+
88
+ biggest_classify = classify.sort {|c1,c2| c1[1]<=>c2[1]}
89
+
90
+ if !biggest_classify.empty?
91
+
92
+ definition,classify_size = biggest_classify.last
93
+
94
+
95
+ a = seq.new_action(-1,-1,type) # adds the correspondent action to the sequence
96
+
97
+ a.message = definition
98
+
99
+ a.tag_id = definition.gsub(' ','_')
100
+
101
+ # a.found_definition = c.definition # save the classify definitions, each separately
102
+
103
+ #save to this file
104
+ seq.add_file_tag(1, a.tag_id, :file)
105
+
106
+
107
+ actions.push a
108
+
109
+ add_stats('classify_size',classify_size)
110
+ add_stats('classify_ids',definition)
111
+
112
+ seq.add_actions(actions)
113
+ end
114
+
115
+ end
116
+
117
+ #Returns an array with the errors due to parameters are missing
118
+ def self.check_params(params)
119
+ errors=[]
120
+
121
+
122
+ comment='Blast E-value used as cut-off when searching for contaminations'
123
+ default_value = 1e-10
124
+ params.check_param(errors,'blast_evalue_classify','Float',default_value,comment)
125
+
126
+ comment='Minimum required identity (%) for a reliable classify'
127
+ default_value = 85
128
+ params.check_param(errors,'blast_percent_classify','Integer',default_value,comment)
129
+
130
+ comment='Minimum hit size (nt) for considering to classify'
131
+ default_value = 30 # era 40
132
+ params.check_param(errors,'min_classify_hit_size','Integer',default_value,comment)
133
+
134
+ comment='Path for classify database'
135
+ default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
136
+ params.check_param(errors,'classify_db','DB',default_value,comment)
137
+
138
+
139
+ return errors
140
+ end
141
+
142
+
143
+ end
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.49'
33
+ VERSION = '2.0.50'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.49
5
+ version: 2.0.50
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-05-24 00:00:00 Z
13
+ date: 2012-06-13 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -201,6 +201,7 @@ files:
201
201
  - lib/seqtrimnext/actions/action_ab_adapter.rb
202
202
  - lib/seqtrimnext/actions/action_ab_far_adapter.rb
203
203
  - lib/seqtrimnext/actions/action_ab_left_adapter.rb
204
+ - lib/seqtrimnext/actions/action_classify.rb
204
205
  - lib/seqtrimnext/actions/action_empty_insert.rb
205
206
  - lib/seqtrimnext/actions/action_ignore_repeated.rb
206
207
  - lib/seqtrimnext/actions/action_indetermination.rb
@@ -254,6 +255,7 @@ files:
254
255
  - lib/seqtrimnext/plugins/plugin_adapters_old.rb
255
256
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
256
257
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
258
+ - lib/seqtrimnext/plugins/plugin_classify.rb
257
259
  - lib/seqtrimnext/plugins/plugin_extract_inserts.rb
258
260
  - lib/seqtrimnext/plugins/plugin_find_poly_at.rb
259
261
  - lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -319,7 +321,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
321
  requirements: []
320
322
 
321
323
  rubyforge_project: seqtrimnext
322
- rubygems_version: 1.7.2
324
+ rubygems_version: 1.8.24
323
325
  signing_key:
324
326
  specification_version: 3
325
327
  summary: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data