seqtrimnext 2.0.49 → 2.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +2 -0
- data/lib/seqtrimnext/actions/action_classify.rb +23 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +1 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +2 -0
- data/lib/seqtrimnext/plugins/plugin.rb +0 -1
- data/lib/seqtrimnext/plugins/plugin_classify.rb +143 -0
- data/lib/seqtrimnext.rb +1 -1
- metadata +5 -3
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -24,6 +24,7 @@ History.txt
|
|
24
24
|
lib/seqtrimnext/actions/action_ab_adapter.rb
|
25
25
|
lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
26
26
|
lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
27
|
+
lib/seqtrimnext/actions/action_classify.rb
|
27
28
|
lib/seqtrimnext/actions/action_empty_insert.rb
|
28
29
|
lib/seqtrimnext/actions/action_ignore_repeated.rb
|
29
30
|
lib/seqtrimnext/actions/action_indetermination.rb
|
@@ -77,6 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
|
|
77
78
|
lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
78
79
|
lib/seqtrimnext/plugins/plugin_amplicons.rb
|
79
80
|
lib/seqtrimnext/plugins/plugin_contaminants.rb
|
81
|
+
lib/seqtrimnext/plugins/plugin_classify.rb
|
80
82
|
lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
81
83
|
lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
82
84
|
lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionClassify < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =false
|
15
|
+
end
|
16
|
+
|
17
|
+
def apply_decoration(char)
|
18
|
+
return char.yellow
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
end
|
@@ -203,6 +203,8 @@ class Seqtrim
|
|
203
203
|
format = :sanger
|
204
204
|
end
|
205
205
|
|
206
|
+
$LOG.info("Used FastQ format for input files: #{format}")
|
207
|
+
|
206
208
|
sequence_reader = FastqFile.new(seqs_path,'r',format, true)
|
207
209
|
# cd_hit_input_file = 'cd-hit-input.fasta'
|
208
210
|
cd_hit_input_file = seqs_path
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
require "make_blast_db"
|
4
|
+
########################################################
|
5
|
+
# Author: Almudena Bocinos Rioboo
|
6
|
+
#
|
7
|
+
# Defines the main methods that are necessary to execute Pluginclassify
|
8
|
+
# Inherit: Plugin
|
9
|
+
########################################################
|
10
|
+
|
11
|
+
class PluginClassify < Plugin
|
12
|
+
|
13
|
+
|
14
|
+
MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
|
15
|
+
|
16
|
+
|
17
|
+
def near_to_extrem(c,seq,min_cont_size)
|
18
|
+
max_to_extreme=(min_cont_size/2).to_i
|
19
|
+
return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
|
20
|
+
end
|
21
|
+
|
22
|
+
def sum_hits_by_id(hits)
|
23
|
+
res={}
|
24
|
+
|
25
|
+
hits.each do |c|
|
26
|
+
hit_size=c.q_end - c.q_beg + 1
|
27
|
+
|
28
|
+
res[c.definition] = (res[c.definition]||0)+hit_size
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
puts res.to_json
|
33
|
+
return res
|
34
|
+
end
|
35
|
+
|
36
|
+
#Begins the plugin1's execution to warn that there are classify in the sequence "seq"
|
37
|
+
def execute(seqs)
|
38
|
+
blasts= do_blasts(seqs)
|
39
|
+
|
40
|
+
seqs.each_with_index do |s,i|
|
41
|
+
exec_seq(s,blasts.querys[i])
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def do_blasts(seqs)
|
46
|
+
|
47
|
+
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
48
|
+
# y una secuencia de baja complejidad como entrada
|
49
|
+
|
50
|
+
blast = BatchBlast.new("-db #{@params.get_param('classify_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
|
51
|
+
|
52
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
|
53
|
+
|
54
|
+
fastas=[]
|
55
|
+
|
56
|
+
seqs.each do |seq|
|
57
|
+
fastas.push ">"+seq.seq_name
|
58
|
+
fastas.push seq.seq_fasta
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
blast_table_results = blast.do_blast(fastas,:xml)
|
63
|
+
|
64
|
+
return blast_table_results
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def exec_seq(seq,blast_query)
|
69
|
+
if blast_query.query_id != seq.seq_name
|
70
|
+
# raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
71
|
+
end
|
72
|
+
|
73
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
|
74
|
+
|
75
|
+
type = "ActionClassify"
|
76
|
+
|
77
|
+
classify={}
|
78
|
+
|
79
|
+
# classify_ids=[]
|
80
|
+
|
81
|
+
classify=sum_hits_by_id(blast_query.hits)
|
82
|
+
|
83
|
+
actions=[]
|
84
|
+
classify_size=0
|
85
|
+
|
86
|
+
min_cont_size=@params.get_param('min_classify_hit_size').to_i
|
87
|
+
|
88
|
+
biggest_classify = classify.sort {|c1,c2| c1[1]<=>c2[1]}
|
89
|
+
|
90
|
+
if !biggest_classify.empty?
|
91
|
+
|
92
|
+
definition,classify_size = biggest_classify.last
|
93
|
+
|
94
|
+
|
95
|
+
a = seq.new_action(-1,-1,type) # adds the correspondent action to the sequence
|
96
|
+
|
97
|
+
a.message = definition
|
98
|
+
|
99
|
+
a.tag_id = definition.gsub(' ','_')
|
100
|
+
|
101
|
+
# a.found_definition = c.definition # save the classify definitions, each separately
|
102
|
+
|
103
|
+
#save to this file
|
104
|
+
seq.add_file_tag(1, a.tag_id, :file)
|
105
|
+
|
106
|
+
|
107
|
+
actions.push a
|
108
|
+
|
109
|
+
add_stats('classify_size',classify_size)
|
110
|
+
add_stats('classify_ids',definition)
|
111
|
+
|
112
|
+
seq.add_actions(actions)
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
#Returns an array with the errors due to parameters are missing
|
118
|
+
def self.check_params(params)
|
119
|
+
errors=[]
|
120
|
+
|
121
|
+
|
122
|
+
comment='Blast E-value used as cut-off when searching for contaminations'
|
123
|
+
default_value = 1e-10
|
124
|
+
params.check_param(errors,'blast_evalue_classify','Float',default_value,comment)
|
125
|
+
|
126
|
+
comment='Minimum required identity (%) for a reliable classify'
|
127
|
+
default_value = 85
|
128
|
+
params.check_param(errors,'blast_percent_classify','Integer',default_value,comment)
|
129
|
+
|
130
|
+
comment='Minimum hit size (nt) for considering to classify'
|
131
|
+
default_value = 30 # era 40
|
132
|
+
params.check_param(errors,'min_classify_hit_size','Integer',default_value,comment)
|
133
|
+
|
134
|
+
comment='Path for classify database'
|
135
|
+
default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
|
136
|
+
params.check_param(errors,'classify_db','DB',default_value,comment)
|
137
|
+
|
138
|
+
|
139
|
+
return errors
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
end
|
data/lib/seqtrimnext.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.0.
|
5
|
+
version: 2.0.50
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero & Almudena Bocinos
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-06-13 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: narray
|
@@ -201,6 +201,7 @@ files:
|
|
201
201
|
- lib/seqtrimnext/actions/action_ab_adapter.rb
|
202
202
|
- lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
203
203
|
- lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
204
|
+
- lib/seqtrimnext/actions/action_classify.rb
|
204
205
|
- lib/seqtrimnext/actions/action_empty_insert.rb
|
205
206
|
- lib/seqtrimnext/actions/action_ignore_repeated.rb
|
206
207
|
- lib/seqtrimnext/actions/action_indetermination.rb
|
@@ -254,6 +255,7 @@ files:
|
|
254
255
|
- lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
255
256
|
- lib/seqtrimnext/plugins/plugin_amplicons.rb
|
256
257
|
- lib/seqtrimnext/plugins/plugin_contaminants.rb
|
258
|
+
- lib/seqtrimnext/plugins/plugin_classify.rb
|
257
259
|
- lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
258
260
|
- lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
259
261
|
- lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -319,7 +321,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
319
321
|
requirements: []
|
320
322
|
|
321
323
|
rubyforge_project: seqtrimnext
|
322
|
-
rubygems_version: 1.
|
324
|
+
rubygems_version: 1.8.24
|
323
325
|
signing_key:
|
324
326
|
specification_version: 3
|
325
327
|
summary: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data
|