seqtrimnext 2.0.49 → 2.0.50
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +2 -0
- data/lib/seqtrimnext/actions/action_classify.rb +23 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +1 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +2 -0
- data/lib/seqtrimnext/plugins/plugin.rb +0 -1
- data/lib/seqtrimnext/plugins/plugin_classify.rb +143 -0
- data/lib/seqtrimnext.rb +1 -1
- metadata +5 -3
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -24,6 +24,7 @@ History.txt
|
|
24
24
|
lib/seqtrimnext/actions/action_ab_adapter.rb
|
25
25
|
lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
26
26
|
lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
27
|
+
lib/seqtrimnext/actions/action_classify.rb
|
27
28
|
lib/seqtrimnext/actions/action_empty_insert.rb
|
28
29
|
lib/seqtrimnext/actions/action_ignore_repeated.rb
|
29
30
|
lib/seqtrimnext/actions/action_indetermination.rb
|
@@ -77,6 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
|
|
77
78
|
lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
78
79
|
lib/seqtrimnext/plugins/plugin_amplicons.rb
|
79
80
|
lib/seqtrimnext/plugins/plugin_contaminants.rb
|
81
|
+
lib/seqtrimnext/plugins/plugin_classify.rb
|
80
82
|
lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
81
83
|
lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
82
84
|
lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionClassify < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =false
|
15
|
+
end
|
16
|
+
|
17
|
+
def apply_decoration(char)
|
18
|
+
return char.yellow
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
end
|
@@ -203,6 +203,8 @@ class Seqtrim
|
|
203
203
|
format = :sanger
|
204
204
|
end
|
205
205
|
|
206
|
+
$LOG.info("Used FastQ format for input files: #{format}")
|
207
|
+
|
206
208
|
sequence_reader = FastqFile.new(seqs_path,'r',format, true)
|
207
209
|
# cd_hit_input_file = 'cd-hit-input.fasta'
|
208
210
|
cd_hit_input_file = seqs_path
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
require "make_blast_db"
|
4
|
+
########################################################
|
5
|
+
# Author: Almudena Bocinos Rioboo
|
6
|
+
#
|
7
|
+
# Defines the main methods that are necessary to execute Pluginclassify
|
8
|
+
# Inherit: Plugin
|
9
|
+
########################################################
|
10
|
+
|
11
|
+
class PluginClassify < Plugin
|
12
|
+
|
13
|
+
|
14
|
+
MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
|
15
|
+
|
16
|
+
|
17
|
+
def near_to_extrem(c,seq,min_cont_size)
|
18
|
+
max_to_extreme=(min_cont_size/2).to_i
|
19
|
+
return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
|
20
|
+
end
|
21
|
+
|
22
|
+
def sum_hits_by_id(hits)
|
23
|
+
res={}
|
24
|
+
|
25
|
+
hits.each do |c|
|
26
|
+
hit_size=c.q_end - c.q_beg + 1
|
27
|
+
|
28
|
+
res[c.definition] = (res[c.definition]||0)+hit_size
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
puts res.to_json
|
33
|
+
return res
|
34
|
+
end
|
35
|
+
|
36
|
+
#Begins the plugin1's execution to warn that there are classify in the sequence "seq"
|
37
|
+
def execute(seqs)
|
38
|
+
blasts= do_blasts(seqs)
|
39
|
+
|
40
|
+
seqs.each_with_index do |s,i|
|
41
|
+
exec_seq(s,blasts.querys[i])
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def do_blasts(seqs)
|
46
|
+
|
47
|
+
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
48
|
+
# y una secuencia de baja complejidad como entrada
|
49
|
+
|
50
|
+
blast = BatchBlast.new("-db #{@params.get_param('classify_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
|
51
|
+
|
52
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
|
53
|
+
|
54
|
+
fastas=[]
|
55
|
+
|
56
|
+
seqs.each do |seq|
|
57
|
+
fastas.push ">"+seq.seq_name
|
58
|
+
fastas.push seq.seq_fasta
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
blast_table_results = blast.do_blast(fastas,:xml)
|
63
|
+
|
64
|
+
return blast_table_results
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def exec_seq(seq,blast_query)
|
69
|
+
if blast_query.query_id != seq.seq_name
|
70
|
+
# raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
71
|
+
end
|
72
|
+
|
73
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
|
74
|
+
|
75
|
+
type = "ActionClassify"
|
76
|
+
|
77
|
+
classify={}
|
78
|
+
|
79
|
+
# classify_ids=[]
|
80
|
+
|
81
|
+
classify=sum_hits_by_id(blast_query.hits)
|
82
|
+
|
83
|
+
actions=[]
|
84
|
+
classify_size=0
|
85
|
+
|
86
|
+
min_cont_size=@params.get_param('min_classify_hit_size').to_i
|
87
|
+
|
88
|
+
biggest_classify = classify.sort {|c1,c2| c1[1]<=>c2[1]}
|
89
|
+
|
90
|
+
if !biggest_classify.empty?
|
91
|
+
|
92
|
+
definition,classify_size = biggest_classify.last
|
93
|
+
|
94
|
+
|
95
|
+
a = seq.new_action(-1,-1,type) # adds the correspondent action to the sequence
|
96
|
+
|
97
|
+
a.message = definition
|
98
|
+
|
99
|
+
a.tag_id = definition.gsub(' ','_')
|
100
|
+
|
101
|
+
# a.found_definition = c.definition # save the classify definitions, each separately
|
102
|
+
|
103
|
+
#save to this file
|
104
|
+
seq.add_file_tag(1, a.tag_id, :file)
|
105
|
+
|
106
|
+
|
107
|
+
actions.push a
|
108
|
+
|
109
|
+
add_stats('classify_size',classify_size)
|
110
|
+
add_stats('classify_ids',definition)
|
111
|
+
|
112
|
+
seq.add_actions(actions)
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
#Returns an array with the errors due to parameters are missing
|
118
|
+
def self.check_params(params)
|
119
|
+
errors=[]
|
120
|
+
|
121
|
+
|
122
|
+
comment='Blast E-value used as cut-off when searching for contaminations'
|
123
|
+
default_value = 1e-10
|
124
|
+
params.check_param(errors,'blast_evalue_classify','Float',default_value,comment)
|
125
|
+
|
126
|
+
comment='Minimum required identity (%) for a reliable classify'
|
127
|
+
default_value = 85
|
128
|
+
params.check_param(errors,'blast_percent_classify','Integer',default_value,comment)
|
129
|
+
|
130
|
+
comment='Minimum hit size (nt) for considering to classify'
|
131
|
+
default_value = 30 # era 40
|
132
|
+
params.check_param(errors,'min_classify_hit_size','Integer',default_value,comment)
|
133
|
+
|
134
|
+
comment='Path for classify database'
|
135
|
+
default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
|
136
|
+
params.check_param(errors,'classify_db','DB',default_value,comment)
|
137
|
+
|
138
|
+
|
139
|
+
return errors
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
end
|
data/lib/seqtrimnext.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.0.
|
5
|
+
version: 2.0.50
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero & Almudena Bocinos
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-06-13 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: narray
|
@@ -201,6 +201,7 @@ files:
|
|
201
201
|
- lib/seqtrimnext/actions/action_ab_adapter.rb
|
202
202
|
- lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
203
203
|
- lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
204
|
+
- lib/seqtrimnext/actions/action_classify.rb
|
204
205
|
- lib/seqtrimnext/actions/action_empty_insert.rb
|
205
206
|
- lib/seqtrimnext/actions/action_ignore_repeated.rb
|
206
207
|
- lib/seqtrimnext/actions/action_indetermination.rb
|
@@ -254,6 +255,7 @@ files:
|
|
254
255
|
- lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
255
256
|
- lib/seqtrimnext/plugins/plugin_amplicons.rb
|
256
257
|
- lib/seqtrimnext/plugins/plugin_contaminants.rb
|
258
|
+
- lib/seqtrimnext/plugins/plugin_classify.rb
|
257
259
|
- lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
258
260
|
- lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
259
261
|
- lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -319,7 +321,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
319
321
|
requirements: []
|
320
322
|
|
321
323
|
rubyforge_project: seqtrimnext
|
322
|
-
rubygems_version: 1.
|
324
|
+
rubygems_version: 1.8.24
|
323
325
|
signing_key:
|
324
326
|
specification_version: 3
|
325
327
|
summary: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data
|