seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
#########################################
|
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
|
3
|
+
# This class provided the methods to read the parameter's file and to create the structure where will be storaged the param's name and the param's numeric-value
|
|
4
|
+
#########################################
|
|
5
|
+
require 'scbi_fasta'
|
|
6
|
+
|
|
7
|
+
class Params
|
|
8
|
+
|
|
9
|
+
#Creates the structure and start the reading of parameter's file
|
|
10
|
+
def initialize(path)
|
|
11
|
+
@params = {}
|
|
12
|
+
@comments = {}
|
|
13
|
+
# @param_order={}
|
|
14
|
+
@mids = {}
|
|
15
|
+
@linkers = {}
|
|
16
|
+
@clusters = {}
|
|
17
|
+
|
|
18
|
+
@plugin_comments = {}
|
|
19
|
+
|
|
20
|
+
read_file(path)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Reads param's file
|
|
24
|
+
def read_file(path_file)
|
|
25
|
+
|
|
26
|
+
if path_file && File.exists?(path_file)
|
|
27
|
+
comments= []
|
|
28
|
+
File.open(path_file).each_line do |line|
|
|
29
|
+
line.chomp! # delete end of line
|
|
30
|
+
|
|
31
|
+
if !line.empty?
|
|
32
|
+
if !(line =~ /^\s*#/) # if line is not a comment
|
|
33
|
+
# extract the parameter's name in params[0] and the parameter's value in params[1]
|
|
34
|
+
params = line.split(/\s*=\s*/)
|
|
35
|
+
|
|
36
|
+
# store in the hash the pair key/value, in our case will be name/numeric-value ,
|
|
37
|
+
# that are save in params[0] and params[1], respectively
|
|
38
|
+
if (!params[0].nil?) && (!params[1].nil?)
|
|
39
|
+
set_param(params[0].strip,params[1].strip,comments)
|
|
40
|
+
comments=[]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
#$LOG.debug "read: #{params[1]}"
|
|
44
|
+
else
|
|
45
|
+
comments << line.gsub(/^\s*#/,'')
|
|
46
|
+
end # end if comentario
|
|
47
|
+
end #end if line
|
|
48
|
+
end #end each
|
|
49
|
+
if @params.empty?
|
|
50
|
+
puts "INVALID PARAMETER FILE: #{path_file}. No parameters defined"
|
|
51
|
+
exit
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
end# end def
|
|
56
|
+
|
|
57
|
+
# Load mid's file
|
|
58
|
+
def load_mids(path_file)
|
|
59
|
+
if File.exists?(path_file)
|
|
60
|
+
ff = FastaFile.new(path_file)
|
|
61
|
+
ff.each {|n,f|
|
|
62
|
+
# @mid_sizes[n]=f.size
|
|
63
|
+
@mids[n]=f
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
ff.close
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Load mid's file
|
|
71
|
+
def load_linkers(path_file)
|
|
72
|
+
if File.exists?(path_file)
|
|
73
|
+
ff = FastaFile.new(path_file)
|
|
74
|
+
ff.each {|n,f|
|
|
75
|
+
@linkers[n]=f
|
|
76
|
+
}
|
|
77
|
+
ff.close
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def load_repeated_seqs(file_path)
|
|
82
|
+
@clusters={}
|
|
83
|
+
|
|
84
|
+
if File.exists?(file_path)
|
|
85
|
+
# File.open(ARGV[0]).each_line do |line|
|
|
86
|
+
$LOG.debug("Repeated file path:"+file_path)
|
|
87
|
+
File.open(file_path).each_line do |line|
|
|
88
|
+
#puts line,line[0]
|
|
89
|
+
# en ruby19 line[0] da el caracter, no el chr
|
|
90
|
+
#if (line[0]!=62) && (line[0]!=48)
|
|
91
|
+
if (line[0]!='>'[0]) && (line[0]!='0'[0])
|
|
92
|
+
#puts line
|
|
93
|
+
#puts line,line[0]
|
|
94
|
+
if line =~ />([^\.]+)\.\.\.\s/
|
|
95
|
+
#puts 'ok'
|
|
96
|
+
@clusters[$1]=1
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
$LOG.info("Repeated sequence count: #{@clusters.count}")
|
|
101
|
+
else
|
|
102
|
+
$LOG.error("Clustering file's doesn't exists: #{@clusters.count}")
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def repeated_seq?(name)
|
|
109
|
+
return !@clusters[name].nil?
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Reads param's file
|
|
113
|
+
def save_file(path_file)
|
|
114
|
+
|
|
115
|
+
f=File.open(path_file,'w')
|
|
116
|
+
@plugin_comments.keys.sort.reverse.each do |plugin_name|
|
|
117
|
+
f.puts "#"*50
|
|
118
|
+
f.puts "# " + plugin_name
|
|
119
|
+
f.puts "#"*50
|
|
120
|
+
f.puts ''
|
|
121
|
+
|
|
122
|
+
@plugin_comments[plugin_name].keys.each do |param|
|
|
123
|
+
comment=get_comment(plugin_name,param)
|
|
124
|
+
if !comment.nil? && !comment.empty? && comment!=''
|
|
125
|
+
f.puts comment.map{|c| '# '+c if c!=''}
|
|
126
|
+
end
|
|
127
|
+
f.puts ''
|
|
128
|
+
f.puts "#{param} = #{@params[param]}"
|
|
129
|
+
f.puts ''
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
f.close
|
|
133
|
+
|
|
134
|
+
end# end def
|
|
135
|
+
|
|
136
|
+
# Prints the pair name/numeric-value for every parameter
|
|
137
|
+
def print_parameters()
|
|
138
|
+
@params.each do |clave, valor|
|
|
139
|
+
#$LOG.debug "The Parameter #{clave} have the value " +valor.to_s
|
|
140
|
+
puts "#{clave} = #{valor} "
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Return the parameter's list in an array
|
|
145
|
+
def get_param(param)
|
|
146
|
+
#$LOG.debug "Get Param: #{@params[param]}"
|
|
147
|
+
return @params[param]
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Return the mid's size of param
|
|
151
|
+
def get_mid(param)
|
|
152
|
+
return @mids[param]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Return the linker of param
|
|
156
|
+
def get_linker(linker)
|
|
157
|
+
return @linkers[linker]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def get_plugin
|
|
161
|
+
plugin='General'
|
|
162
|
+
# puts caller(2)[1]
|
|
163
|
+
at = caller(2)[1]
|
|
164
|
+
if /^(.+?):(\d+)(?::in `(.*)')?/ =~ at
|
|
165
|
+
file = Regexp.last_match[1]
|
|
166
|
+
line = Regexp.last_match[2].to_i
|
|
167
|
+
method = Regexp.last_match[3]
|
|
168
|
+
plugin=File.basename(file,File.extname(file))
|
|
169
|
+
|
|
170
|
+
# puts "CALLER: #{plugin}"
|
|
171
|
+
# puts [file, line, method]
|
|
172
|
+
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def set_param(param,value,comment=nil)
|
|
178
|
+
plugin=get_plugin
|
|
179
|
+
|
|
180
|
+
@params[param] = value
|
|
181
|
+
|
|
182
|
+
if get_comment(plugin,param).nil?
|
|
183
|
+
set_comment(plugin,param,comment)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# def set_order(plugin,param)
|
|
190
|
+
#
|
|
191
|
+
# if @param_order[plugin].nil?
|
|
192
|
+
# @param_order[plugin]=[]
|
|
193
|
+
# end
|
|
194
|
+
#
|
|
195
|
+
# if !@param_order[plugin].index(param)
|
|
196
|
+
# @param_order[plugin].push param
|
|
197
|
+
# end
|
|
198
|
+
# end
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def get_comment(plugin,param)
|
|
202
|
+
res = nil
|
|
203
|
+
if @plugin_comments[plugin]
|
|
204
|
+
res =@plugin_comments[plugin][param]
|
|
205
|
+
end
|
|
206
|
+
return res
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def set_comment(plugin,param,comment)
|
|
211
|
+
if !comment.is_a?(Array) && !comment.nil?
|
|
212
|
+
comment=comment.split("\n").compact.map{|l| l.strip}
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
if @plugin_comments[plugin].nil?
|
|
216
|
+
@plugin_comments[plugin]={}
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
old_comment=''
|
|
220
|
+
# remove from other plugins
|
|
221
|
+
@plugin_comments.each do |plugin_name,comments|
|
|
222
|
+
if comments.keys.include?(param) && plugin_name!=plugin
|
|
223
|
+
old_comment=comments[param]
|
|
224
|
+
comments.delete(param)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
if comment.nil?
|
|
229
|
+
comment=old_comment
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# @comments[param]=(comment || [''])
|
|
233
|
+
@plugin_comments[plugin][param]=(comment || [''])
|
|
234
|
+
# puts @plugin_comments.keys.to_json
|
|
235
|
+
|
|
236
|
+
# remove empty comments
|
|
237
|
+
|
|
238
|
+
@plugin_comments.reverse_each do |plugin_name,comments|
|
|
239
|
+
if comments.empty?
|
|
240
|
+
@plugin_comments.delete(plugin_name)
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def set_mid(param,value)
|
|
248
|
+
@mids[param] = value
|
|
249
|
+
end
|
|
250
|
+
#attr_accessor :h # to accede to the atribute 'h' from out of this class
|
|
251
|
+
|
|
252
|
+
# Returns true if exists the parameter and nil if don't
|
|
253
|
+
def exists?(param_name)
|
|
254
|
+
return !@params[param_name].nil?
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def check_plugin_list_param(errors,param_name)
|
|
258
|
+
# get plugin list
|
|
259
|
+
pl_list=get_param(param_name)
|
|
260
|
+
|
|
261
|
+
# puts pl_list,param_name
|
|
262
|
+
list=pl_list.split(',')
|
|
263
|
+
|
|
264
|
+
list.map!{|e| e.strip}
|
|
265
|
+
|
|
266
|
+
# puts "Lista:",list.join(',')
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# always the pluginExtractInserts at the end
|
|
270
|
+
list.delete('PluginExtractInserts')
|
|
271
|
+
list << 'PluginExtractInserts'
|
|
272
|
+
|
|
273
|
+
set_param(param_name,list.join(','))
|
|
274
|
+
# if !list.include?('PluginExtractInserts')
|
|
275
|
+
# raise "PluginExtractInserts do not exists"
|
|
276
|
+
#
|
|
277
|
+
# end
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# def split_databases(db_param_name)
|
|
284
|
+
def check_db_param(errors,db_param_name)
|
|
285
|
+
# expand database paths
|
|
286
|
+
dbs= get_param(db_param_name).gsub('"','').split(/\s+/)
|
|
287
|
+
# puts "ALGO"*20
|
|
288
|
+
puts dbs.join(',')
|
|
289
|
+
#
|
|
290
|
+
# TODO - chequear aqui que la db no esta vacia y que esta formateada.
|
|
291
|
+
dbs.reverse_each {|db|
|
|
292
|
+
if !File.exists?(db)
|
|
293
|
+
path=File.join($FORMATTED_DB_PATH,db)
|
|
294
|
+
else
|
|
295
|
+
path=db
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
if Dir.glob(path+'*.n*').entries.empty?
|
|
299
|
+
puts "DB file #{path} not formatted"
|
|
300
|
+
|
|
301
|
+
if File.writable_real?(path)
|
|
302
|
+
cmd = "makeblastdb -in #{path} -parse_seqids -dbtype nucl"
|
|
303
|
+
system(cmd)
|
|
304
|
+
else
|
|
305
|
+
raise "Can't format database. We don't have write permissions in: #{path}"
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
if !File.exists?(path)
|
|
310
|
+
raise "DB File #{path} does not exists"
|
|
311
|
+
# exit
|
|
312
|
+
end
|
|
313
|
+
}
|
|
314
|
+
db_paths = '"'+dbs.join(' ')+'"'
|
|
315
|
+
|
|
316
|
+
set_param(db_param_name,db_paths)
|
|
317
|
+
# puts "DATABASES"+db_paths
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def self.generate_sample_params
|
|
322
|
+
|
|
323
|
+
filename = 'sample_params.txt'
|
|
324
|
+
x=1
|
|
325
|
+
while File.exists?(filename)
|
|
326
|
+
filename = "sample_params#{x}.txt"
|
|
327
|
+
x+=1
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
f=File.open(filename,'w')
|
|
331
|
+
f.puts "SAMPLE_PARAMS"
|
|
332
|
+
f.close
|
|
333
|
+
|
|
334
|
+
puts "Sample params file generated: #{filename}"
|
|
335
|
+
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def check_param(errors,param,param_class,default_value=nil, comment=nil)
|
|
339
|
+
|
|
340
|
+
if !exists?(param)
|
|
341
|
+
if default_value.nil? #|| (default_value.is_a?(String) && default_value.empty?)
|
|
342
|
+
errors.push "The param #{param} is required and no default value is available"
|
|
343
|
+
else
|
|
344
|
+
set_param(param,default_value,comment)
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
s = get_param(param)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
set_comment(get_plugin,param,comment)
|
|
352
|
+
|
|
353
|
+
# check_class=Object.const_get(param_class)
|
|
354
|
+
begin
|
|
355
|
+
|
|
356
|
+
case param_class
|
|
357
|
+
when 'Integer'
|
|
358
|
+
r = Integer(s)
|
|
359
|
+
when 'Float'
|
|
360
|
+
r = Float(s)
|
|
361
|
+
when 'String'
|
|
362
|
+
r = String(s)
|
|
363
|
+
when 'DB'
|
|
364
|
+
# it is a string
|
|
365
|
+
r = String(s)
|
|
366
|
+
# and must be a valid db
|
|
367
|
+
|
|
368
|
+
r = check_db_param(errors,param)
|
|
369
|
+
|
|
370
|
+
when 'PluginList'
|
|
371
|
+
r=String(s)
|
|
372
|
+
r= check_plugin_list_param(errors,param)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
rescue Exception => e
|
|
376
|
+
message="Current value is ##{s}#. "
|
|
377
|
+
if param_class=='DB'
|
|
378
|
+
message += e.message
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
errors.push "Param #{param} is not a valid #{param_class}. #{message}"
|
|
382
|
+
end
|
|
383
|
+
# end
|
|
384
|
+
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# require '../utils/fasta_qual_reader' #descomentar en test_extracts
|
|
2
|
+
require 'fasta_qual_reader' #descomentar en seqtrimii
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
require 'make_blast_db'
|
|
7
|
+
require 'scbi_blast'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
######################################
|
|
13
|
+
# Author:: Almudena Bocinos Rioboo
|
|
14
|
+
# Extract stats like mean of sequence's length
|
|
15
|
+
# Inherit:: FastaReader
|
|
16
|
+
######################################
|
|
17
|
+
|
|
18
|
+
class Piro < FastaQualReader
|
|
19
|
+
#attr_accessor :na
|
|
20
|
+
def initialize(path_fasta,path_qual)
|
|
21
|
+
@path_fasta=path_fasta
|
|
22
|
+
super(path_fasta,path_qual)
|
|
23
|
+
MakeBlastDb.execute('../sequences/gemini.fasta')
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def on_process_sequence(name_seq,fasta_seq,qual_seq)
|
|
29
|
+
puts "in piro, in on process sequence, #{name_seq}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
blast = BatchBlast.new('-db '+ @path_fasta ,'blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
|
|
33
|
+
#blast = BatchBlast.new('DB/vectors.fasta','blastn',' -task blastn ') #get vectors
|
|
34
|
+
|
|
35
|
+
$LOG.debug "-------OK----"
|
|
36
|
+
|
|
37
|
+
# puts seq.seq_fasta
|
|
38
|
+
res = blast.do_blast(fasta_seq) #rise seq to contaminants executing over blast
|
|
39
|
+
#
|
|
40
|
+
# blast_table_results = BlastTableResult.new(res,nil)
|
|
41
|
+
|
|
42
|
+
# vectors=[]
|
|
43
|
+
# blast_table_results.querys.each do |query| # first round to save contaminants without overlap
|
|
44
|
+
# merge_hits(query.hits,vectors)
|
|
45
|
+
# end
|
|
46
|
+
#
|
|
47
|
+
# begin
|
|
48
|
+
# vectors2=vectors # second round to save contaminants without overlap
|
|
49
|
+
# vectors = []
|
|
50
|
+
# merge_hits(vectors2,vectors)
|
|
51
|
+
# end until (vectors2.count == vectors.count)
|
|
52
|
+
#
|
|
53
|
+
#
|
|
54
|
+
# vectors.each do |c| # adds the correspondent action to the sequence
|
|
55
|
+
# #if @seq_specie!=seq_specie-contaminant
|
|
56
|
+
#
|
|
57
|
+
# if (@params.get_param('genus')!=c.subject_id.split('_')[1])
|
|
58
|
+
# # puts "DIFFERENT SPECIE #{specie} ,#{hit.subject_id.split('_')[1].to_s}"
|
|
59
|
+
# a = seq.add_action(c.q_beg,c.q_end,type)
|
|
60
|
+
# a.message = c.subject_id
|
|
61
|
+
# end
|
|
62
|
+
# end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def on_end_process()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#########################################
|
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
|
3
|
+
# This class provided the methods to manage the execution of the plugins
|
|
4
|
+
#########################################
|
|
5
|
+
|
|
6
|
+
require 'json'
|
|
7
|
+
|
|
8
|
+
require 'sequence_with_action'
|
|
9
|
+
require 'sequence_group'
|
|
10
|
+
|
|
11
|
+
class PluginManager
|
|
12
|
+
attr_accessor :plugin_names
|
|
13
|
+
|
|
14
|
+
#Storages the necessary plugins specified in 'plugin_list' and start the loading of plugins
|
|
15
|
+
def initialize(plugin_list,params)
|
|
16
|
+
@plugin_names = plugin_list.strip.split(',').map{|p| p.strip}.reject{|p| ['',nil].include?(p)}
|
|
17
|
+
@params = params
|
|
18
|
+
|
|
19
|
+
# puts plugin_list
|
|
20
|
+
load_plugins_from_files
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Receives the plugin's list , and create an instance from its respective class (it's that have the same name)
|
|
25
|
+
def execute_plugins(running_seqs)
|
|
26
|
+
# $LOG.info " Begin process: Execute plugins "
|
|
27
|
+
|
|
28
|
+
if !@plugin_names.empty?
|
|
29
|
+
|
|
30
|
+
# keeps a list of rejected sequences
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
rejected_seqs = []
|
|
34
|
+
|
|
35
|
+
@plugin_names.each do |plugin_name|
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# remove rejected or empty seqs from execution list
|
|
39
|
+
running_seqs.reverse_each do |seq|
|
|
40
|
+
if seq.seq_rejected || seq.seq_fasta.empty?
|
|
41
|
+
# remove from running
|
|
42
|
+
running_seqs.delete(seq)
|
|
43
|
+
# save in rejecteds
|
|
44
|
+
rejected_seqs.push seq
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
if running_seqs.empty?
|
|
49
|
+
break
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Creates an instance of the respective plugin stored in "plugin_name",and asociate it to the sequence 'seq'
|
|
53
|
+
plugin_class = Object.const_get(plugin_name)
|
|
54
|
+
# puts "ANTES *************" + seq.to_text_seq_fasta
|
|
55
|
+
plugin_execution=plugin_class.new(running_seqs,@params)
|
|
56
|
+
#puts plugin_name+':'+ plugin_execution.stats.to_json
|
|
57
|
+
running_seqs.stats[plugin_name] = plugin_execution.stats
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# puts running_seqs.stats.to_json
|
|
61
|
+
plugin_execution=nil
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
end #end each
|
|
65
|
+
|
|
66
|
+
running_seqs.add(rejected_seqs)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
else
|
|
70
|
+
|
|
71
|
+
raise "Plugin list not found"
|
|
72
|
+
end #end if lista-param
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Checks if the parameters are right for all plugins's execution. Finally return true if all is right or false if isn't
|
|
76
|
+
def check_plugins_params(params)
|
|
77
|
+
res = true
|
|
78
|
+
|
|
79
|
+
if !@plugin_names.empty?
|
|
80
|
+
#$LOG.debug " Check params values #{plugin_list} "
|
|
81
|
+
|
|
82
|
+
@plugin_names.each do |plugin_name|
|
|
83
|
+
|
|
84
|
+
#Call to the respective plugin storaged in 'plugin_name'
|
|
85
|
+
plugin_class = Object.const_get(plugin_name)
|
|
86
|
+
# DONE - chequear si es un plugin de verdad u otra clase
|
|
87
|
+
# puts plugin_class,plugin_class.ancestors.map {|e| puts e,e.class}
|
|
88
|
+
|
|
89
|
+
if plugin_class.ancestors.include?(Plugin)
|
|
90
|
+
errors=plugin_class.check_params(params)
|
|
91
|
+
else
|
|
92
|
+
errors= [plugin_name + ' is not a valid plugin']
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
if !errors.empty?
|
|
96
|
+
$LOG.error plugin_name+ ' found following errors:'
|
|
97
|
+
errors.each do |error|
|
|
98
|
+
$LOG.error ' -' + error
|
|
99
|
+
res = false
|
|
100
|
+
end #end each
|
|
101
|
+
end #end if
|
|
102
|
+
|
|
103
|
+
end #end each
|
|
104
|
+
else
|
|
105
|
+
$LOG.error "No plugin list provided"
|
|
106
|
+
res = false
|
|
107
|
+
end #end if plugin-list
|
|
108
|
+
|
|
109
|
+
return res
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# Iterates by the files from the folder 'plugins', and load it
|
|
114
|
+
def load_plugins_from_files
|
|
115
|
+
|
|
116
|
+
# DONE - CARGAR los plugins que hay en @plugin_names en vez de todos
|
|
117
|
+
|
|
118
|
+
# the plugin_name changes to file using plugin_name.decamelize
|
|
119
|
+
@plugin_names.each do |plugin_name|
|
|
120
|
+
plugin_file = plugin_name.decamelize
|
|
121
|
+
require plugin_file
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
end # end def
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Iterates by the files from the folder 'plugins', and load it
|
|
128
|
+
def load_plugins_from_files_old
|
|
129
|
+
|
|
130
|
+
# DONE - CARGAR los plugins que hay en @plugin_names en vez de todos
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
ignore = ['.','..','plugin.rb']
|
|
135
|
+
#carpeta=Dir.open("progs/ruby/seqtrimii/plugins")
|
|
136
|
+
|
|
137
|
+
plugins_path = File.expand_path(File.join(File.dirname(__FILE__), "../plugins"))
|
|
138
|
+
if !File.exists?(plugins_path)
|
|
139
|
+
raise "Plugin folder does not exists"
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# carpeta=Dir.open(plugins_path)
|
|
143
|
+
entries = Dir.glob(File.join(plugins_path,'*.rb'))
|
|
144
|
+
# carpeta.
|
|
145
|
+
entries.each do |plugin|
|
|
146
|
+
if !ignore.include?(plugin)
|
|
147
|
+
require plugin
|
|
148
|
+
end # end if
|
|
149
|
+
end # end each
|
|
150
|
+
end # end def
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
end
|