seqtrimnext 2.0.62 → 2.0.66
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/resume_execution_times.rb +15 -6
- data/bin/resume_stn_contaminants.rb +37 -0
- data/bin/resume_stn_stats.rb +2 -1
- data/bin/seqtrimnext +8 -6
- data/bin/split_fastq.rb +1 -1
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +17 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +2 -2
- data/lib/seqtrimnext/classes/params.rb +14 -2
- data/lib/seqtrimnext/classes/seqtrim.rb +16 -8
- data/lib/seqtrimnext/plugins/plugin.rb +16 -5
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +29 -1
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +34 -5
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +2 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +4 -3
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +28 -4
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +12 -2
- data/lib/seqtrimnext/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab74e9c551c43eb30da4a881ab544c9d7754ecaa
|
4
|
+
data.tar.gz: acdf2c4e6d4e5d36d5fafbe1e2004178ed33b145
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d60d9cb31be6eab17ff47e81e77ae3246408dee62f4061e040a9efdcfc9e6f54997e873e244a337091be391a00ca05eec2cd8d5da7b6ecdebf468b76540b5a39
|
7
|
+
data.tar.gz: bf2c6174ca48e5ea0d1bf9b7da3e58b01d89016674fa7050473a6dca2dc105d6156281c080e25d5906384a4587b7a51571b51ed9155043e0dee71546fff5ecb7
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
if ARGV.count<1
|
6
|
-
puts "Usage: #{$0} [-t] [-j] stats1.json"
|
6
|
+
puts "Usage: #{$0} [-t] [-j] [-h] stats1.json"
|
7
7
|
exit -1
|
8
8
|
end
|
9
9
|
|
@@ -20,6 +20,15 @@ if ARGV[0]=='-j'
|
|
20
20
|
ARGV.shift
|
21
21
|
end
|
22
22
|
|
23
|
+
time_divider=1
|
24
|
+
# print header
|
25
|
+
if ARGV[0]=='-h'
|
26
|
+
time_divider=3600
|
27
|
+
puts "Times are in hours"
|
28
|
+
ARGV.shift
|
29
|
+
end
|
30
|
+
|
31
|
+
|
23
32
|
|
24
33
|
ARGV.each do |file_path|
|
25
34
|
sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
|
@@ -34,7 +43,7 @@ ARGV.each do |file_path|
|
|
34
43
|
begin
|
35
44
|
stats.keys.each do |k|
|
36
45
|
if stats[k]['execution_time']
|
37
|
-
res[k]=stats[k]['execution_time']['total_seconds']
|
46
|
+
res[k]=stats[k]['execution_time']['total_seconds'].to_f/time_divider
|
38
47
|
total+=res[k]
|
39
48
|
end
|
40
49
|
end
|
@@ -48,10 +57,10 @@ ARGV.each do |file_path|
|
|
48
57
|
|
49
58
|
if stats['scbi_mapreduce']
|
50
59
|
res['TOTAL_workers']=stats['scbi_mapreduce']['connected_workers']
|
51
|
-
res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']
|
52
|
-
res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']
|
53
|
-
res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']
|
54
|
-
res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']
|
60
|
+
res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']/time_divider
|
61
|
+
res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']/time_divider
|
62
|
+
res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']/time_divider
|
63
|
+
res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']/time_divider
|
55
64
|
end
|
56
65
|
|
57
66
|
if puts_json
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
if ARGV.count<1
|
6
|
+
puts "Usage: #{$0} stats1.json [stats2.json stats3.json,...]"
|
7
|
+
exit -1
|
8
|
+
end
|
9
|
+
|
10
|
+
# print header
|
11
|
+
if ARGV[0]=='-t'
|
12
|
+
#heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
|
13
|
+
#puts heads.join("\t")
|
14
|
+
ARGV.shift
|
15
|
+
end
|
16
|
+
|
17
|
+
contaminants={}
|
18
|
+
|
19
|
+
ARGV.each do |file_path|
|
20
|
+
sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
|
21
|
+
|
22
|
+
stats=JSON::parse(File.read(file_path))
|
23
|
+
|
24
|
+
res=[]
|
25
|
+
cont=stats['PluginContaminants']['contaminants_ids']
|
26
|
+
|
27
|
+
limit=60
|
28
|
+
cont.keys.sort{|c1,c2| cont[c2].to_i <=> cont[c1].to_i}.each do |k|
|
29
|
+
#puts "#{k} => #{cont[k]}"
|
30
|
+
contaminants[k]=(contaminants[k] || 0 ) + cont[k]
|
31
|
+
limit = limit -1
|
32
|
+
break if limit==0
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
puts JSON::pretty_generate(contaminants)
|
data/bin/resume_stn_stats.rb
CHANGED
@@ -10,10 +10,11 @@ end
|
|
10
10
|
# print header
|
11
11
|
if ARGV[0]=='-t'
|
12
12
|
heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
|
13
|
-
puts heads.join("\t")
|
13
|
+
puts heads.join("\t")
|
14
14
|
ARGV.shift
|
15
15
|
end
|
16
16
|
|
17
|
+
|
17
18
|
ARGV.each do |file_path|
|
18
19
|
sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
|
19
20
|
|
data/bin/seqtrimnext
CHANGED
@@ -216,7 +216,7 @@ optparse = OptionParser.new do |opts|
|
|
216
216
|
options[:workers] = Integer(workers)
|
217
217
|
rescue
|
218
218
|
STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
|
219
|
-
exit
|
219
|
+
exit -1
|
220
220
|
end
|
221
221
|
|
222
222
|
end
|
@@ -268,7 +268,7 @@ optparse = OptionParser.new do |opts|
|
|
268
268
|
options[:format] = value
|
269
269
|
if !['sanger','illumina15', 'illumina18'].include?(value)
|
270
270
|
STDERR.puts "ERROR: Invalid FASTQ format parameter #{value}"
|
271
|
-
exit
|
271
|
+
exit -1
|
272
272
|
end
|
273
273
|
end
|
274
274
|
|
@@ -301,7 +301,7 @@ optparse = OptionParser.new do |opts|
|
|
301
301
|
options[:template] = file
|
302
302
|
end
|
303
303
|
|
304
|
-
options[:chunk_size] =
|
304
|
+
options[:chunk_size] = 5000
|
305
305
|
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
306
306
|
options[:chunk_size] = cs.to_i
|
307
307
|
end
|
@@ -332,7 +332,7 @@ optparse = OptionParser.new do |opts|
|
|
332
332
|
opts.on_tail( '-h', '--help', 'Display this screen' ) do
|
333
333
|
puts opts
|
334
334
|
show_additional_help
|
335
|
-
exit
|
335
|
+
exit -1
|
336
336
|
end
|
337
337
|
end
|
338
338
|
|
@@ -342,13 +342,13 @@ optparse.parse!
|
|
342
342
|
if options[:list_db] then
|
343
343
|
# List database entries in a database
|
344
344
|
ListDb.new($DB_PATH,options[:list_db_name])
|
345
|
-
exit
|
345
|
+
exit -1
|
346
346
|
end
|
347
347
|
|
348
348
|
if options[:gen_params] then
|
349
349
|
# Generates a sample params file in current directory
|
350
350
|
Params.generate_sample_params
|
351
|
-
exit
|
351
|
+
exit -1
|
352
352
|
end
|
353
353
|
|
354
354
|
#set logger
|
@@ -453,3 +453,5 @@ else
|
|
453
453
|
|
454
454
|
$LOG.info "If you want a detailed report in PDF format, be sure you have installed the optional seqtrimnext_report gem (gem install seqtrimnext_report)#{skip_text}"
|
455
455
|
end
|
456
|
+
|
457
|
+
exit(Seqtrim.exit_status)
|
data/bin/split_fastq.rb
CHANGED
@@ -17,6 +17,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
17
17
|
@@full_stats={}
|
18
18
|
@@params= params
|
19
19
|
@@exit = false
|
20
|
+
@@exit_status=0
|
20
21
|
|
21
22
|
@@ongoing_stats={}
|
22
23
|
@@ongoing_stats[:sequence_count] = 0
|
@@ -85,6 +86,10 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
85
86
|
|
86
87
|
end
|
87
88
|
|
89
|
+
def self.exit_status
|
90
|
+
return @@exit_status
|
91
|
+
end
|
92
|
+
|
88
93
|
def self.end_work_manager
|
89
94
|
|
90
95
|
# if initial files doesn't exists, create it
|
@@ -115,6 +120,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
115
120
|
|
116
121
|
end
|
117
122
|
|
123
|
+
def self.global_error_received(error_exception)
|
124
|
+
$LOG.error "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
|
125
|
+
@@errors_file.puts "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
|
126
|
+
@@errors_file.puts "="*60
|
127
|
+
@@exit_status=-1
|
128
|
+
SeqtrimWorkManager.controlled_exit
|
129
|
+
end
|
130
|
+
|
118
131
|
def self.work_manager_finished
|
119
132
|
@@full_stats['scbi_mapreduce']=@@stats
|
120
133
|
|
@@ -129,10 +142,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
129
142
|
def error_received(worker_error, obj)
|
130
143
|
@@errors_file.puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
|
131
144
|
@@errors_file.puts "="*60
|
145
|
+
@@exit_status=-1
|
146
|
+
SeqtrimWorkManager.controlled_exit
|
147
|
+
|
132
148
|
end
|
133
149
|
|
134
150
|
def too_many_errors_received
|
135
151
|
$LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
|
152
|
+
@@exit_status=-1
|
136
153
|
end
|
137
154
|
|
138
155
|
def worker_initial_config
|
@@ -81,8 +81,8 @@ class SeqtrimWorker < ScbiMapreduce::Worker
|
|
81
81
|
def starting_worker
|
82
82
|
|
83
83
|
# $WORKER_LOG.level = Logger::ERROR
|
84
|
-
|
85
|
-
|
84
|
+
#$WORKER_LOG.level = Logger::WARN
|
85
|
+
$WORKER_LOG.level = Logger::INFO
|
86
86
|
$WORKER_LOG.info "Loading actions"
|
87
87
|
|
88
88
|
@action_manager = ActionManager.new
|
@@ -33,8 +33,18 @@ class Params
|
|
33
33
|
if !line.empty?
|
34
34
|
if !(line =~ /^\s*#/) # if line is not a comment
|
35
35
|
# extract the parameter's name in params[0] and the parameter's value in params[1]
|
36
|
-
params = line.split(/\s*=\s*/)
|
36
|
+
#params = line.split(/\s*=\s*/)
|
37
37
|
|
38
|
+
# store in the hash the pair key/value, in our case will be name/numeric-value ,
|
39
|
+
# that are save in params[0] and params[1], respectively
|
40
|
+
#if (!params[0].nil?) && (!params[1].nil?)
|
41
|
+
# set_param(params[0].strip,params[1].strip,comments)
|
42
|
+
# comments=[]
|
43
|
+
#end
|
44
|
+
|
45
|
+
line =~ /^\s*([^=]*)\s*=\s*(.*)\s*$/
|
46
|
+
params=[$1,$2]
|
47
|
+
|
38
48
|
# store in the hash the pair key/value, in our case will be name/numeric-value ,
|
39
49
|
# that are save in params[0] and params[1], respectively
|
40
50
|
if (!params[0].nil?) && (!params[1].nil?)
|
@@ -42,7 +52,9 @@ class Params
|
|
42
52
|
comments=[]
|
43
53
|
end
|
44
54
|
|
45
|
-
|
55
|
+
|
56
|
+
$LOG.debug "read: #{params[0]}=#{params[1]}" if !$LOG.nil?
|
57
|
+
|
46
58
|
else
|
47
59
|
comments << line.gsub(/^\s*#/,'')
|
48
60
|
end # end if comentario
|
@@ -15,6 +15,12 @@ require 'action_manager'
|
|
15
15
|
|
16
16
|
class Seqtrim
|
17
17
|
|
18
|
+
|
19
|
+
|
20
|
+
def self.exit_status
|
21
|
+
return SeqtrimWorkManager.exit_status
|
22
|
+
end
|
23
|
+
|
18
24
|
# First of all, reads the file's parameters, where are the values of all parameters and the 'plugin_list' that specifies the order of execution from the plugins.
|
19
25
|
#
|
20
26
|
# Secondly, loads the plugins in a folder .
|
@@ -24,7 +30,6 @@ class Seqtrim
|
|
24
30
|
# After that, creates a thread's pool of a determinate number of workers, e.g. 10 threads,
|
25
31
|
# reads the sequences from files 'fasta' , until now without qualities,
|
26
32
|
# and executes the plugins over the sequences in the pool of threads
|
27
|
-
|
28
33
|
|
29
34
|
def get_custom_cdhit(cd_hit_input_file,params)
|
30
35
|
cmd=''
|
@@ -136,7 +141,6 @@ class Seqtrim
|
|
136
141
|
default_value=Seqtrimnext::SEQTRIM_VERSION
|
137
142
|
params.check_param(errors,'seqtrim_version','String',default_value,comment)
|
138
143
|
|
139
|
-
|
140
144
|
if !errors.empty?
|
141
145
|
$LOG.error 'Please, define the following global parameters in params file:'
|
142
146
|
errors.each do |error|
|
@@ -166,7 +170,7 @@ class Seqtrim
|
|
166
170
|
if File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
|
167
171
|
if !options[:use_checkpoint]
|
168
172
|
STDERR.puts "ERROR: A checkpoint file exists, either delete it or provide -C flag to use it"
|
169
|
-
exit
|
173
|
+
exit(-1)
|
170
174
|
end
|
171
175
|
end
|
172
176
|
|
@@ -230,7 +234,7 @@ class Seqtrim
|
|
230
234
|
|
231
235
|
$LOG.info "Checking global params"
|
232
236
|
if !check_global_params(params)
|
233
|
-
exit
|
237
|
+
exit(-1)
|
234
238
|
end
|
235
239
|
|
236
240
|
# Load actions
|
@@ -253,8 +257,7 @@ class Seqtrim
|
|
253
257
|
|
254
258
|
# save used params to file
|
255
259
|
params.save_file('used_params.txt')
|
256
|
-
|
257
|
-
exit
|
260
|
+
exit(-1)
|
258
261
|
end
|
259
262
|
|
260
263
|
if !Dir.exists?(OUTPUT_PATH)
|
@@ -297,7 +300,7 @@ class Seqtrim
|
|
297
300
|
params.load_repeated_seqs('clusters.fasta.clstr')
|
298
301
|
else
|
299
302
|
$LOG.error("Exiting due to not found clusters.fasta.clstr. Maybe cd-hit failed. Check cd-hit.out")
|
300
|
-
exit
|
303
|
+
exit(-1)
|
301
304
|
end
|
302
305
|
end
|
303
306
|
|
@@ -367,7 +370,12 @@ class Seqtrim
|
|
367
370
|
sequence_readers.each do |file|
|
368
371
|
file.close
|
369
372
|
end
|
370
|
-
|
373
|
+
|
374
|
+
if SeqtrimWorkManager.exit_status>=0
|
375
|
+
$LOG.info "Exit status: #{SeqtrimWorkManager.exit_status}"
|
376
|
+
else
|
377
|
+
$LOG.error "Exit status: #{SeqtrimWorkManager.exit_status}"
|
378
|
+
end
|
371
379
|
$LOG.info 'Closing server'
|
372
380
|
end
|
373
381
|
|
@@ -23,12 +23,15 @@ class Plugin
|
|
23
23
|
t1=Time.now
|
24
24
|
execute(seq)
|
25
25
|
t2=Time.now
|
26
|
+
|
27
|
+
add_plugin_stats('execution_time','total_seconds',t2-t1)
|
26
28
|
end
|
27
29
|
|
28
|
-
|
29
|
-
@stats['execution_time']={}
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
+
def add_plugin_stats(cat,item,elapsed_time)
|
33
|
+
@stats[cat]={} if @stats[cat].nil?
|
34
|
+
@stats[cat][item]=elapsed_time
|
32
35
|
end
|
33
36
|
|
34
37
|
def can_execute?
|
@@ -40,7 +43,9 @@ class Plugin
|
|
40
43
|
|
41
44
|
#Begins the plugin's execution whit the sequence "seq"
|
42
45
|
def execute(seqs)
|
46
|
+
t1=Time.now
|
43
47
|
blasts=do_blasts(seqs)
|
48
|
+
|
44
49
|
|
45
50
|
if !blasts.empty?
|
46
51
|
|
@@ -49,18 +54,24 @@ class Plugin
|
|
49
54
|
else
|
50
55
|
queries = blasts.querys
|
51
56
|
end
|
52
|
-
|
57
|
+
|
58
|
+
add_plugin_stats('execution_time','blast_and_parse',Time.now-t1)
|
59
|
+
|
60
|
+
t1=Time.now
|
53
61
|
seqs.each_with_index do |s,i|
|
54
62
|
exec_seq(s,queries[i])
|
55
63
|
end
|
56
64
|
|
57
65
|
else # there is no blast
|
58
66
|
|
67
|
+
t1=Time.now
|
59
68
|
seqs.each do |s|
|
60
69
|
exec_seq(s,nil)
|
61
70
|
end
|
62
|
-
|
63
71
|
end
|
72
|
+
|
73
|
+
add_plugin_stats('execution_time','exec_seq',Time.now-t1)
|
74
|
+
|
64
75
|
end
|
65
76
|
|
66
77
|
def do_blasts(seqs)
|
@@ -18,6 +18,9 @@ class PluginAbAdapters < Plugin
|
|
18
18
|
|
19
19
|
# find MIDS with less results than max_target_seqs value
|
20
20
|
blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
|
21
|
+
|
22
|
+
# con culling limit hay situaciones en las que un hit largo con 1 mismatch es ignorado porque hay otro más corto que no tiene ningun error, no es aceptable.
|
23
|
+
#blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE} -culling_limit=1")
|
21
24
|
$LOG.debug('BLAST:'+blast.get_blast_cmd)
|
22
25
|
|
23
26
|
fastas=[]
|
@@ -29,7 +32,32 @@ class PluginAbAdapters < Plugin
|
|
29
32
|
|
30
33
|
# fastas=fastas.join("\n")
|
31
34
|
|
32
|
-
blast_table_results = blast.do_blast(fastas)
|
35
|
+
#blast_table_results = blast.do_blast(fastas)
|
36
|
+
#blast_table_results = BlastTableResult.new(blast_table_results)
|
37
|
+
|
38
|
+
|
39
|
+
t1=Time.now
|
40
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
41
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
42
|
+
|
43
|
+
|
44
|
+
#f=File.new("/tmp/salida_#{fastas.first.gsub('>','').gsub('/','_')}.blast",'w+')
|
45
|
+
#f.puts blast.get_blast_cmd
|
46
|
+
#f.puts blast_table_results
|
47
|
+
#f.close
|
48
|
+
|
49
|
+
t1=Time.now
|
50
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
51
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
52
|
+
|
53
|
+
|
54
|
+
# t1=Time.now
|
55
|
+
# blast_table_results = blast.do_blast(fastas,:xml,false)
|
56
|
+
# add_plugin_stats('execution_time','blast',Time.now-t1)
|
57
|
+
|
58
|
+
# t1=Time.now
|
59
|
+
# blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
60
|
+
# add_plugin_stats('execution_time','parse',Time.now-t1)
|
33
61
|
|
34
62
|
# puts blast_table_results.inspect
|
35
63
|
|
@@ -23,12 +23,22 @@ class PluginContaminants < Plugin
|
|
23
23
|
# find MIDS with less results than max_target_seqs value
|
24
24
|
# blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
25
25
|
|
26
|
+
# This message is due to short sequences
|
27
|
+
#Warning: Could not calculate ungapped Karlin-Altschul parameters due to an invalid query sequence or its translation. Please verify the query sequence(s) and/or filtering options
|
28
|
+
|
26
29
|
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
27
30
|
# y una secuencia de baja complejidad como entrada
|
28
31
|
|
29
|
-
|
32
|
+
task_template=@params.get_param('blast_task_template_contaminants')
|
33
|
+
extra_params=@params.get_param('blast_extra_params_contaminants')
|
34
|
+
|
35
|
+
extra_params=extra_params.gsub(/^\"|\"?$/, '')
|
36
|
+
|
37
|
+
#blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
38
|
+
|
39
|
+
blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
30
40
|
|
31
|
-
$LOG.debug('BLAST:'+blast.get_blast_cmd(:
|
41
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
|
32
42
|
|
33
43
|
fastas=[]
|
34
44
|
|
@@ -43,7 +53,16 @@ class PluginContaminants < Plugin
|
|
43
53
|
# $LOG.info(fastas)
|
44
54
|
# $LOG.info('-'*20)
|
45
55
|
|
46
|
-
blast_table_results = blast.do_blast(fastas,:xml)
|
56
|
+
#blast_table_results = blast.do_blast(fastas,:xml)
|
57
|
+
t1=Time.now
|
58
|
+
#blast_table_results = blast.do_blast(fastas,:xml,false)
|
59
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
60
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
61
|
+
|
62
|
+
t1=Time.now
|
63
|
+
#blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
64
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
65
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
47
66
|
|
48
67
|
# $LOG.info(blast_table_results.inspect)
|
49
68
|
|
@@ -62,12 +81,14 @@ class PluginContaminants < Plugin
|
|
62
81
|
return
|
63
82
|
end
|
64
83
|
|
84
|
+
#if blast_query.query_def != seq.seq_name
|
65
85
|
if blast_query.query_id != seq.seq_name
|
66
|
-
|
86
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
67
87
|
end
|
68
88
|
|
69
89
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
|
70
90
|
|
91
|
+
#add_plugin_stats('hsp_count',seq.seq_name,blast_query.hits.count)
|
71
92
|
|
72
93
|
#blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
|
73
94
|
# blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
@@ -195,11 +216,19 @@ class PluginContaminants < Plugin
|
|
195
216
|
default_value = 'true'
|
196
217
|
params.check_param(errors,'contaminants_reject','String',default_value,comment)
|
197
218
|
|
198
|
-
|
199
219
|
comment='Path for contaminants database'
|
200
220
|
default_value = File.join($FORMATTED_DB_PATH,'contaminants.fasta')
|
201
221
|
params.check_param(errors,'contaminants_db','DB',default_value,comment)
|
202
222
|
|
223
|
+
comment='Blast task template for contaminations'
|
224
|
+
#default_value = 'blastn'
|
225
|
+
default_value = 'megablast'
|
226
|
+
params.check_param(errors,'blast_task_template_contaminants','String',default_value,comment)
|
227
|
+
|
228
|
+
comment='Blast extra params for contaminations'
|
229
|
+
#default_value = ''
|
230
|
+
default_value = '"-word_size=22"'
|
231
|
+
params.check_param(errors,'blast_extra_params_contaminants','String',default_value,comment)
|
203
232
|
|
204
233
|
return errors
|
205
234
|
end
|
@@ -73,6 +73,7 @@ class PluginLowComplexity < Plugin
|
|
73
73
|
|
74
74
|
if !actions.empty?
|
75
75
|
add_stats('low_complexity',total_dust)
|
76
|
+
seq.add_file_tag(0, 'low_complexity', :both, 100)
|
76
77
|
seq.add_actions(actions)
|
77
78
|
end
|
78
79
|
|
@@ -92,6 +93,7 @@ class PluginLowComplexity < Plugin
|
|
92
93
|
# default_value = 80
|
93
94
|
# params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
|
94
95
|
#
|
96
|
+
|
95
97
|
return errors
|
96
98
|
end
|
97
99
|
|
@@ -170,9 +170,10 @@ class PluginLowQuality < Plugin
|
|
170
170
|
default_value = 20
|
171
171
|
params.check_param(errors,'min_quality','Integer',default_value,comment)
|
172
172
|
|
173
|
-
|
174
|
-
|
175
|
-
|
173
|
+
|
174
|
+
#comment='Quality window for scanning low quality segments'
|
175
|
+
#default_value = 15
|
176
|
+
#params.check_param(errors,'window_width','Integer',default_value,comment)
|
176
177
|
|
177
178
|
|
178
179
|
comment='Minimum length of a bad quality segment inside the sequence'
|
@@ -43,9 +43,14 @@ class PluginUserContaminants < Plugin
|
|
43
43
|
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
44
44
|
# y una secuencia de baja complejidad como entrada
|
45
45
|
|
46
|
-
|
46
|
+
task_template=@params.get_param('blast_task_template_user_contaminants')
|
47
|
+
extra_params=@params.get_param('blast_extra_params_user_contaminants')
|
47
48
|
|
48
|
-
|
49
|
+
extra_params=extra_params.gsub(/^\"|\"?$/, '')
|
50
|
+
|
51
|
+
blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
|
52
|
+
|
53
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
|
49
54
|
|
50
55
|
fastas=[]
|
51
56
|
|
@@ -55,7 +60,16 @@ class PluginUserContaminants < Plugin
|
|
55
60
|
end
|
56
61
|
|
57
62
|
|
58
|
-
blast_table_results = blast.do_blast(fastas,:xml)
|
63
|
+
#blast_table_results = blast.do_blast(fastas,:xml)
|
64
|
+
t1=Time.now
|
65
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
66
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
67
|
+
|
68
|
+
t1=Time.now
|
69
|
+
#blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
70
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
71
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
72
|
+
|
59
73
|
|
60
74
|
return blast_table_results
|
61
75
|
end
|
@@ -63,7 +77,7 @@ class PluginUserContaminants < Plugin
|
|
63
77
|
|
64
78
|
def exec_seq(seq,blast_query)
|
65
79
|
if blast_query.query_id != seq.seq_name
|
66
|
-
|
80
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
67
81
|
end
|
68
82
|
|
69
83
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
|
@@ -144,6 +158,16 @@ class PluginUserContaminants < Plugin
|
|
144
158
|
default_value = "" #File.join($FORMATTED_DB_PATH,'user_contaminant.fasta')
|
145
159
|
params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
|
146
160
|
|
161
|
+
comment='Blast task template for user contaminations'
|
162
|
+
#default_value = 'blastn'
|
163
|
+
default_value = 'megablast'
|
164
|
+
params.check_param(errors,'blast_task_template_user_contaminants','String',default_value,comment)
|
165
|
+
|
166
|
+
comment='Blast extra params for user contaminations'
|
167
|
+
#default_value = ''
|
168
|
+
default_value = '"-word_size=22"'
|
169
|
+
params.check_param(errors,'blast_extra_params_user_contaminants','String',default_value,comment)
|
170
|
+
|
147
171
|
return errors
|
148
172
|
end
|
149
173
|
|
@@ -40,7 +40,17 @@ class PluginVectors < Plugin
|
|
40
40
|
|
41
41
|
# fastas=fastas.join("\n")
|
42
42
|
|
43
|
-
blast_table_results = blast.do_blast(fastas,:xml)
|
43
|
+
#blast_table_results = blast.do_blast(fastas,:xml)
|
44
|
+
|
45
|
+
t1=Time.now
|
46
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
47
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
48
|
+
|
49
|
+
t1=Time.now
|
50
|
+
#blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
51
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
52
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
53
|
+
|
44
54
|
|
45
55
|
# puts blast_table_results.inspect
|
46
56
|
|
@@ -50,7 +60,7 @@ class PluginVectors < Plugin
|
|
50
60
|
|
51
61
|
def exec_seq(seq,blast_query)
|
52
62
|
if blast_query.query_id != seq.seq_name
|
53
|
-
|
63
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
54
64
|
end
|
55
65
|
|
56
66
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
|
data/lib/seqtrimnext/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.66
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dario Guerrero
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-05-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -219,6 +219,7 @@ executables:
|
|
219
219
|
- resume_clusters.rb
|
220
220
|
- resume_execution_times.rb
|
221
221
|
- resume_rejected.rb
|
222
|
+
- resume_stn_contaminants.rb
|
222
223
|
- resume_stn_stats.rb
|
223
224
|
- reverse_paired.rb
|
224
225
|
- seqtrimnext
|
@@ -252,6 +253,7 @@ files:
|
|
252
253
|
- bin/resume_clusters.rb
|
253
254
|
- bin/resume_execution_times.rb
|
254
255
|
- bin/resume_rejected.rb
|
256
|
+
- bin/resume_stn_contaminants.rb
|
255
257
|
- bin/resume_stn_stats.rb
|
256
258
|
- bin/reverse_paired.rb
|
257
259
|
- bin/seqtrimnext
|
@@ -380,7 +382,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
380
382
|
version: '0'
|
381
383
|
requirements: []
|
382
384
|
rubyforge_project:
|
383
|
-
rubygems_version: 2.4.
|
385
|
+
rubygems_version: 2.4.8
|
384
386
|
signing_key:
|
385
387
|
specification_version: 4
|
386
388
|
summary: Sequences preprocessing and cleaning software
|