seqtrimnext 2.0.62 → 2.0.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/resume_execution_times.rb +15 -6
- data/bin/resume_stn_contaminants.rb +37 -0
- data/bin/resume_stn_stats.rb +2 -1
- data/bin/seqtrimnext +8 -6
- data/bin/split_fastq.rb +1 -1
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +17 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +2 -2
- data/lib/seqtrimnext/classes/params.rb +14 -2
- data/lib/seqtrimnext/classes/seqtrim.rb +16 -8
- data/lib/seqtrimnext/plugins/plugin.rb +16 -5
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +29 -1
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +34 -5
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +2 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +4 -3
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +28 -4
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +12 -2
- data/lib/seqtrimnext/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab74e9c551c43eb30da4a881ab544c9d7754ecaa
|
4
|
+
data.tar.gz: acdf2c4e6d4e5d36d5fafbe1e2004178ed33b145
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d60d9cb31be6eab17ff47e81e77ae3246408dee62f4061e040a9efdcfc9e6f54997e873e244a337091be391a00ca05eec2cd8d5da7b6ecdebf468b76540b5a39
|
7
|
+
data.tar.gz: bf2c6174ca48e5ea0d1bf9b7da3e58b01d89016674fa7050473a6dca2dc105d6156281c080e25d5906384a4587b7a51571b51ed9155043e0dee71546fff5ecb7
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
if ARGV.count<1
|
6
|
-
puts "Usage: #{$0} [-t] [-j] stats1.json"
|
6
|
+
puts "Usage: #{$0} [-t] [-j] [-h] stats1.json"
|
7
7
|
exit -1
|
8
8
|
end
|
9
9
|
|
@@ -20,6 +20,15 @@ if ARGV[0]=='-j'
|
|
20
20
|
ARGV.shift
|
21
21
|
end
|
22
22
|
|
23
|
+
time_divider=1
|
24
|
+
# print header
|
25
|
+
if ARGV[0]=='-h'
|
26
|
+
time_divider=3600
|
27
|
+
puts "Times are in hours"
|
28
|
+
ARGV.shift
|
29
|
+
end
|
30
|
+
|
31
|
+
|
23
32
|
|
24
33
|
ARGV.each do |file_path|
|
25
34
|
sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
|
@@ -34,7 +43,7 @@ ARGV.each do |file_path|
|
|
34
43
|
begin
|
35
44
|
stats.keys.each do |k|
|
36
45
|
if stats[k]['execution_time']
|
37
|
-
res[k]=stats[k]['execution_time']['total_seconds']
|
46
|
+
res[k]=stats[k]['execution_time']['total_seconds'].to_f/time_divider
|
38
47
|
total+=res[k]
|
39
48
|
end
|
40
49
|
end
|
@@ -48,10 +57,10 @@ ARGV.each do |file_path|
|
|
48
57
|
|
49
58
|
if stats['scbi_mapreduce']
|
50
59
|
res['TOTAL_workers']=stats['scbi_mapreduce']['connected_workers']
|
51
|
-
res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']
|
52
|
-
res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']
|
53
|
-
res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']
|
54
|
-
res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']
|
60
|
+
res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']/time_divider
|
61
|
+
res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']/time_divider
|
62
|
+
res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']/time_divider
|
63
|
+
res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']/time_divider
|
55
64
|
end
|
56
65
|
|
57
66
|
if puts_json
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
if ARGV.count<1
|
6
|
+
puts "Usage: #{$0} stats1.json [stats2.json stats3.json,...]"
|
7
|
+
exit -1
|
8
|
+
end
|
9
|
+
|
10
|
+
# print header
|
11
|
+
if ARGV[0]=='-t'
|
12
|
+
#heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
|
13
|
+
#puts heads.join("\t")
|
14
|
+
ARGV.shift
|
15
|
+
end
|
16
|
+
|
17
|
+
contaminants={}
|
18
|
+
|
19
|
+
ARGV.each do |file_path|
|
20
|
+
sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
|
21
|
+
|
22
|
+
stats=JSON::parse(File.read(file_path))
|
23
|
+
|
24
|
+
res=[]
|
25
|
+
cont=stats['PluginContaminants']['contaminants_ids']
|
26
|
+
|
27
|
+
limit=60
|
28
|
+
cont.keys.sort{|c1,c2| cont[c2].to_i <=> cont[c1].to_i}.each do |k|
|
29
|
+
#puts "#{k} => #{cont[k]}"
|
30
|
+
contaminants[k]=(contaminants[k] || 0 ) + cont[k]
|
31
|
+
limit = limit -1
|
32
|
+
break if limit==0
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
puts JSON::pretty_generate(contaminants)
|
data/bin/resume_stn_stats.rb
CHANGED
@@ -10,10 +10,11 @@ end
|
|
10
10
|
# print header
|
11
11
|
if ARGV[0]=='-t'
|
12
12
|
heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
|
13
|
-
puts heads.join("\t")
|
13
|
+
puts heads.join("\t")
|
14
14
|
ARGV.shift
|
15
15
|
end
|
16
16
|
|
17
|
+
|
17
18
|
ARGV.each do |file_path|
|
18
19
|
sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
|
19
20
|
|
data/bin/seqtrimnext
CHANGED
@@ -216,7 +216,7 @@ optparse = OptionParser.new do |opts|
|
|
216
216
|
options[:workers] = Integer(workers)
|
217
217
|
rescue
|
218
218
|
STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
|
219
|
-
exit
|
219
|
+
exit -1
|
220
220
|
end
|
221
221
|
|
222
222
|
end
|
@@ -268,7 +268,7 @@ optparse = OptionParser.new do |opts|
|
|
268
268
|
options[:format] = value
|
269
269
|
if !['sanger','illumina15', 'illumina18'].include?(value)
|
270
270
|
STDERR.puts "ERROR: Invalid FASTQ format parameter #{value}"
|
271
|
-
exit
|
271
|
+
exit -1
|
272
272
|
end
|
273
273
|
end
|
274
274
|
|
@@ -301,7 +301,7 @@ optparse = OptionParser.new do |opts|
|
|
301
301
|
options[:template] = file
|
302
302
|
end
|
303
303
|
|
304
|
-
options[:chunk_size] =
|
304
|
+
options[:chunk_size] = 5000
|
305
305
|
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
306
306
|
options[:chunk_size] = cs.to_i
|
307
307
|
end
|
@@ -332,7 +332,7 @@ optparse = OptionParser.new do |opts|
|
|
332
332
|
opts.on_tail( '-h', '--help', 'Display this screen' ) do
|
333
333
|
puts opts
|
334
334
|
show_additional_help
|
335
|
-
exit
|
335
|
+
exit -1
|
336
336
|
end
|
337
337
|
end
|
338
338
|
|
@@ -342,13 +342,13 @@ optparse.parse!
|
|
342
342
|
if options[:list_db] then
|
343
343
|
# List database entries in a database
|
344
344
|
ListDb.new($DB_PATH,options[:list_db_name])
|
345
|
-
exit
|
345
|
+
exit -1
|
346
346
|
end
|
347
347
|
|
348
348
|
if options[:gen_params] then
|
349
349
|
# Generates a sample params file in current directory
|
350
350
|
Params.generate_sample_params
|
351
|
-
exit
|
351
|
+
exit -1
|
352
352
|
end
|
353
353
|
|
354
354
|
#set logger
|
@@ -453,3 +453,5 @@ else
|
|
453
453
|
|
454
454
|
$LOG.info "If you want a detailed report in PDF format, be sure you have installed the optional seqtrimnext_report gem (gem install seqtrimnext_report)#{skip_text}"
|
455
455
|
end
|
456
|
+
|
457
|
+
exit(Seqtrim.exit_status)
|
data/bin/split_fastq.rb
CHANGED
@@ -17,6 +17,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
17
17
|
@@full_stats={}
|
18
18
|
@@params= params
|
19
19
|
@@exit = false
|
20
|
+
@@exit_status=0
|
20
21
|
|
21
22
|
@@ongoing_stats={}
|
22
23
|
@@ongoing_stats[:sequence_count] = 0
|
@@ -85,6 +86,10 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
85
86
|
|
86
87
|
end
|
87
88
|
|
89
|
+
def self.exit_status
|
90
|
+
return @@exit_status
|
91
|
+
end
|
92
|
+
|
88
93
|
def self.end_work_manager
|
89
94
|
|
90
95
|
# if initial files doesn't exists, create it
|
@@ -115,6 +120,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
115
120
|
|
116
121
|
end
|
117
122
|
|
123
|
+
def self.global_error_received(error_exception)
|
124
|
+
$LOG.error "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
|
125
|
+
@@errors_file.puts "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
|
126
|
+
@@errors_file.puts "="*60
|
127
|
+
@@exit_status=-1
|
128
|
+
SeqtrimWorkManager.controlled_exit
|
129
|
+
end
|
130
|
+
|
118
131
|
def self.work_manager_finished
|
119
132
|
@@full_stats['scbi_mapreduce']=@@stats
|
120
133
|
|
@@ -129,10 +142,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
129
142
|
def error_received(worker_error, obj)
|
130
143
|
@@errors_file.puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
|
131
144
|
@@errors_file.puts "="*60
|
145
|
+
@@exit_status=-1
|
146
|
+
SeqtrimWorkManager.controlled_exit
|
147
|
+
|
132
148
|
end
|
133
149
|
|
134
150
|
def too_many_errors_received
|
135
151
|
$LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
|
152
|
+
@@exit_status=-1
|
136
153
|
end
|
137
154
|
|
138
155
|
def worker_initial_config
|
@@ -81,8 +81,8 @@ class SeqtrimWorker < ScbiMapreduce::Worker
|
|
81
81
|
def starting_worker
|
82
82
|
|
83
83
|
# $WORKER_LOG.level = Logger::ERROR
|
84
|
-
|
85
|
-
|
84
|
+
#$WORKER_LOG.level = Logger::WARN
|
85
|
+
$WORKER_LOG.level = Logger::INFO
|
86
86
|
$WORKER_LOG.info "Loading actions"
|
87
87
|
|
88
88
|
@action_manager = ActionManager.new
|
@@ -33,8 +33,18 @@ class Params
|
|
33
33
|
if !line.empty?
|
34
34
|
if !(line =~ /^\s*#/) # if line is not a comment
|
35
35
|
# extract the parameter's name in params[0] and the parameter's value in params[1]
|
36
|
-
params = line.split(/\s*=\s*/)
|
36
|
+
#params = line.split(/\s*=\s*/)
|
37
37
|
|
38
|
+
# store in the hash the pair key/value, in our case will be name/numeric-value ,
|
39
|
+
# that are save in params[0] and params[1], respectively
|
40
|
+
#if (!params[0].nil?) && (!params[1].nil?)
|
41
|
+
# set_param(params[0].strip,params[1].strip,comments)
|
42
|
+
# comments=[]
|
43
|
+
#end
|
44
|
+
|
45
|
+
line =~ /^\s*([^=]*)\s*=\s*(.*)\s*$/
|
46
|
+
params=[$1,$2]
|
47
|
+
|
38
48
|
# store in the hash the pair key/value, in our case will be name/numeric-value ,
|
39
49
|
# that are save in params[0] and params[1], respectively
|
40
50
|
if (!params[0].nil?) && (!params[1].nil?)
|
@@ -42,7 +52,9 @@ class Params
|
|
42
52
|
comments=[]
|
43
53
|
end
|
44
54
|
|
45
|
-
|
55
|
+
|
56
|
+
$LOG.debug "read: #{params[0]}=#{params[1]}" if !$LOG.nil?
|
57
|
+
|
46
58
|
else
|
47
59
|
comments << line.gsub(/^\s*#/,'')
|
48
60
|
end # end if comentario
|
@@ -15,6 +15,12 @@ require 'action_manager'
|
|
15
15
|
|
16
16
|
class Seqtrim
|
17
17
|
|
18
|
+
|
19
|
+
|
20
|
+
def self.exit_status
|
21
|
+
return SeqtrimWorkManager.exit_status
|
22
|
+
end
|
23
|
+
|
18
24
|
# First of all, reads the file's parameters, where are the values of all parameters and the 'plugin_list' that specifies the order of execution from the plugins.
|
19
25
|
#
|
20
26
|
# Secondly, loads the plugins in a folder .
|
@@ -24,7 +30,6 @@ class Seqtrim
|
|
24
30
|
# After that, creates a thread's pool of a determinate number of workers, e.g. 10 threads,
|
25
31
|
# reads the sequences from files 'fasta' , until now without qualities,
|
26
32
|
# and executes the plugins over the sequences in the pool of threads
|
27
|
-
|
28
33
|
|
29
34
|
def get_custom_cdhit(cd_hit_input_file,params)
|
30
35
|
cmd=''
|
@@ -136,7 +141,6 @@ class Seqtrim
|
|
136
141
|
default_value=Seqtrimnext::SEQTRIM_VERSION
|
137
142
|
params.check_param(errors,'seqtrim_version','String',default_value,comment)
|
138
143
|
|
139
|
-
|
140
144
|
if !errors.empty?
|
141
145
|
$LOG.error 'Please, define the following global parameters in params file:'
|
142
146
|
errors.each do |error|
|
@@ -166,7 +170,7 @@ class Seqtrim
|
|
166
170
|
if File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
|
167
171
|
if !options[:use_checkpoint]
|
168
172
|
STDERR.puts "ERROR: A checkpoint file exists, either delete it or provide -C flag to use it"
|
169
|
-
exit
|
173
|
+
exit(-1)
|
170
174
|
end
|
171
175
|
end
|
172
176
|
|
@@ -230,7 +234,7 @@ class Seqtrim
|
|
230
234
|
|
231
235
|
$LOG.info "Checking global params"
|
232
236
|
if !check_global_params(params)
|
233
|
-
exit
|
237
|
+
exit(-1)
|
234
238
|
end
|
235
239
|
|
236
240
|
# Load actions
|
@@ -253,8 +257,7 @@ class Seqtrim
|
|
253
257
|
|
254
258
|
# save used params to file
|
255
259
|
params.save_file('used_params.txt')
|
256
|
-
|
257
|
-
exit
|
260
|
+
exit(-1)
|
258
261
|
end
|
259
262
|
|
260
263
|
if !Dir.exists?(OUTPUT_PATH)
|
@@ -297,7 +300,7 @@ class Seqtrim
|
|
297
300
|
params.load_repeated_seqs('clusters.fasta.clstr')
|
298
301
|
else
|
299
302
|
$LOG.error("Exiting due to not found clusters.fasta.clstr. Maybe cd-hit failed. Check cd-hit.out")
|
300
|
-
exit
|
303
|
+
exit(-1)
|
301
304
|
end
|
302
305
|
end
|
303
306
|
|
@@ -367,7 +370,12 @@ class Seqtrim
|
|
367
370
|
sequence_readers.each do |file|
|
368
371
|
file.close
|
369
372
|
end
|
370
|
-
|
373
|
+
|
374
|
+
if SeqtrimWorkManager.exit_status>=0
|
375
|
+
$LOG.info "Exit status: #{SeqtrimWorkManager.exit_status}"
|
376
|
+
else
|
377
|
+
$LOG.error "Exit status: #{SeqtrimWorkManager.exit_status}"
|
378
|
+
end
|
371
379
|
$LOG.info 'Closing server'
|
372
380
|
end
|
373
381
|
|
@@ -23,12 +23,15 @@ class Plugin
|
|
23
23
|
t1=Time.now
|
24
24
|
execute(seq)
|
25
25
|
t2=Time.now
|
26
|
+
|
27
|
+
add_plugin_stats('execution_time','total_seconds',t2-t1)
|
26
28
|
end
|
27
29
|
|
28
|
-
|
29
|
-
@stats['execution_time']={}
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
+
def add_plugin_stats(cat,item,elapsed_time)
|
33
|
+
@stats[cat]={} if @stats[cat].nil?
|
34
|
+
@stats[cat][item]=elapsed_time
|
32
35
|
end
|
33
36
|
|
34
37
|
def can_execute?
|
@@ -40,7 +43,9 @@ class Plugin
|
|
40
43
|
|
41
44
|
#Begins the plugin's execution whit the sequence "seq"
|
42
45
|
def execute(seqs)
|
46
|
+
t1=Time.now
|
43
47
|
blasts=do_blasts(seqs)
|
48
|
+
|
44
49
|
|
45
50
|
if !blasts.empty?
|
46
51
|
|
@@ -49,18 +54,24 @@ class Plugin
|
|
49
54
|
else
|
50
55
|
queries = blasts.querys
|
51
56
|
end
|
52
|
-
|
57
|
+
|
58
|
+
add_plugin_stats('execution_time','blast_and_parse',Time.now-t1)
|
59
|
+
|
60
|
+
t1=Time.now
|
53
61
|
seqs.each_with_index do |s,i|
|
54
62
|
exec_seq(s,queries[i])
|
55
63
|
end
|
56
64
|
|
57
65
|
else # there is no blast
|
58
66
|
|
67
|
+
t1=Time.now
|
59
68
|
seqs.each do |s|
|
60
69
|
exec_seq(s,nil)
|
61
70
|
end
|
62
|
-
|
63
71
|
end
|
72
|
+
|
73
|
+
add_plugin_stats('execution_time','exec_seq',Time.now-t1)
|
74
|
+
|
64
75
|
end
|
65
76
|
|
66
77
|
def do_blasts(seqs)
|
@@ -18,6 +18,9 @@ class PluginAbAdapters < Plugin
|
|
18
18
|
|
19
19
|
# find MIDS with less results than max_target_seqs value
|
20
20
|
blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
|
21
|
+
|
22
|
+
# con culling limit hay situaciones en las que un hit largo con 1 mismatch es ignorado porque hay otro más corto que no tiene ningun error, no es aceptable.
|
23
|
+
#blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE} -culling_limit=1")
|
21
24
|
$LOG.debug('BLAST:'+blast.get_blast_cmd)
|
22
25
|
|
23
26
|
fastas=[]
|
@@ -29,7 +32,32 @@ class PluginAbAdapters < Plugin
|
|
29
32
|
|
30
33
|
# fastas=fastas.join("\n")
|
31
34
|
|
32
|
-
blast_table_results = blast.do_blast(fastas)
|
35
|
+
#blast_table_results = blast.do_blast(fastas)
|
36
|
+
#blast_table_results = BlastTableResult.new(blast_table_results)
|
37
|
+
|
38
|
+
|
39
|
+
t1=Time.now
|
40
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
41
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
42
|
+
|
43
|
+
|
44
|
+
#f=File.new("/tmp/salida_#{fastas.first.gsub('>','').gsub('/','_')}.blast",'w+')
|
45
|
+
#f.puts blast.get_blast_cmd
|
46
|
+
#f.puts blast_table_results
|
47
|
+
#f.close
|
48
|
+
|
49
|
+
t1=Time.now
|
50
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
51
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
52
|
+
|
53
|
+
|
54
|
+
# t1=Time.now
|
55
|
+
# blast_table_results = blast.do_blast(fastas,:xml,false)
|
56
|
+
# add_plugin_stats('execution_time','blast',Time.now-t1)
|
57
|
+
|
58
|
+
# t1=Time.now
|
59
|
+
# blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
60
|
+
# add_plugin_stats('execution_time','parse',Time.now-t1)
|
33
61
|
|
34
62
|
# puts blast_table_results.inspect
|
35
63
|
|
@@ -23,12 +23,22 @@ class PluginContaminants < Plugin
|
|
23
23
|
# find MIDS with less results than max_target_seqs value
|
24
24
|
# blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
25
25
|
|
26
|
+
# This message is due to short sequences
|
27
|
+
#Warning: Could not calculate ungapped Karlin-Altschul parameters due to an invalid query sequence or its translation. Please verify the query sequence(s) and/or filtering options
|
28
|
+
|
26
29
|
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
27
30
|
# y una secuencia de baja complejidad como entrada
|
28
31
|
|
29
|
-
|
32
|
+
task_template=@params.get_param('blast_task_template_contaminants')
|
33
|
+
extra_params=@params.get_param('blast_extra_params_contaminants')
|
34
|
+
|
35
|
+
extra_params=extra_params.gsub(/^\"|\"?$/, '')
|
36
|
+
|
37
|
+
#blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
38
|
+
|
39
|
+
blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
30
40
|
|
31
|
-
$LOG.debug('BLAST:'+blast.get_blast_cmd(:
|
41
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
|
32
42
|
|
33
43
|
fastas=[]
|
34
44
|
|
@@ -43,7 +53,16 @@ class PluginContaminants < Plugin
|
|
43
53
|
# $LOG.info(fastas)
|
44
54
|
# $LOG.info('-'*20)
|
45
55
|
|
46
|
-
blast_table_results = blast.do_blast(fastas,:xml)
|
56
|
+
#blast_table_results = blast.do_blast(fastas,:xml)
|
57
|
+
t1=Time.now
|
58
|
+
#blast_table_results = blast.do_blast(fastas,:xml,false)
|
59
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
60
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
61
|
+
|
62
|
+
t1=Time.now
|
63
|
+
#blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
64
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
65
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
47
66
|
|
48
67
|
# $LOG.info(blast_table_results.inspect)
|
49
68
|
|
@@ -62,12 +81,14 @@ class PluginContaminants < Plugin
|
|
62
81
|
return
|
63
82
|
end
|
64
83
|
|
84
|
+
#if blast_query.query_def != seq.seq_name
|
65
85
|
if blast_query.query_id != seq.seq_name
|
66
|
-
|
86
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
67
87
|
end
|
68
88
|
|
69
89
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
|
70
90
|
|
91
|
+
#add_plugin_stats('hsp_count',seq.seq_name,blast_query.hits.count)
|
71
92
|
|
72
93
|
#blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
|
73
94
|
# blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
|
@@ -195,11 +216,19 @@ class PluginContaminants < Plugin
|
|
195
216
|
default_value = 'true'
|
196
217
|
params.check_param(errors,'contaminants_reject','String',default_value,comment)
|
197
218
|
|
198
|
-
|
199
219
|
comment='Path for contaminants database'
|
200
220
|
default_value = File.join($FORMATTED_DB_PATH,'contaminants.fasta')
|
201
221
|
params.check_param(errors,'contaminants_db','DB',default_value,comment)
|
202
222
|
|
223
|
+
comment='Blast task template for contaminations'
|
224
|
+
#default_value = 'blastn'
|
225
|
+
default_value = 'megablast'
|
226
|
+
params.check_param(errors,'blast_task_template_contaminants','String',default_value,comment)
|
227
|
+
|
228
|
+
comment='Blast extra params for contaminations'
|
229
|
+
#default_value = ''
|
230
|
+
default_value = '"-word_size=22"'
|
231
|
+
params.check_param(errors,'blast_extra_params_contaminants','String',default_value,comment)
|
203
232
|
|
204
233
|
return errors
|
205
234
|
end
|
@@ -73,6 +73,7 @@ class PluginLowComplexity < Plugin
|
|
73
73
|
|
74
74
|
if !actions.empty?
|
75
75
|
add_stats('low_complexity',total_dust)
|
76
|
+
seq.add_file_tag(0, 'low_complexity', :both, 100)
|
76
77
|
seq.add_actions(actions)
|
77
78
|
end
|
78
79
|
|
@@ -92,6 +93,7 @@ class PluginLowComplexity < Plugin
|
|
92
93
|
# default_value = 80
|
93
94
|
# params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
|
94
95
|
#
|
96
|
+
|
95
97
|
return errors
|
96
98
|
end
|
97
99
|
|
@@ -170,9 +170,10 @@ class PluginLowQuality < Plugin
|
|
170
170
|
default_value = 20
|
171
171
|
params.check_param(errors,'min_quality','Integer',default_value,comment)
|
172
172
|
|
173
|
-
|
174
|
-
|
175
|
-
|
173
|
+
|
174
|
+
#comment='Quality window for scanning low quality segments'
|
175
|
+
#default_value = 15
|
176
|
+
#params.check_param(errors,'window_width','Integer',default_value,comment)
|
176
177
|
|
177
178
|
|
178
179
|
comment='Minimum length of a bad quality segment inside the sequence'
|
@@ -43,9 +43,14 @@ class PluginUserContaminants < Plugin
|
|
43
43
|
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
44
44
|
# y una secuencia de baja complejidad como entrada
|
45
45
|
|
46
|
-
|
46
|
+
task_template=@params.get_param('blast_task_template_user_contaminants')
|
47
|
+
extra_params=@params.get_param('blast_extra_params_user_contaminants')
|
47
48
|
|
48
|
-
|
49
|
+
extra_params=extra_params.gsub(/^\"|\"?$/, '')
|
50
|
+
|
51
|
+
blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
|
52
|
+
|
53
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
|
49
54
|
|
50
55
|
fastas=[]
|
51
56
|
|
@@ -55,7 +60,16 @@ class PluginUserContaminants < Plugin
|
|
55
60
|
end
|
56
61
|
|
57
62
|
|
58
|
-
blast_table_results = blast.do_blast(fastas,:xml)
|
63
|
+
#blast_table_results = blast.do_blast(fastas,:xml)
|
64
|
+
t1=Time.now
|
65
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
66
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
67
|
+
|
68
|
+
t1=Time.now
|
69
|
+
#blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
70
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
71
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
72
|
+
|
59
73
|
|
60
74
|
return blast_table_results
|
61
75
|
end
|
@@ -63,7 +77,7 @@ class PluginUserContaminants < Plugin
|
|
63
77
|
|
64
78
|
def exec_seq(seq,blast_query)
|
65
79
|
if blast_query.query_id != seq.seq_name
|
66
|
-
|
80
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
67
81
|
end
|
68
82
|
|
69
83
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
|
@@ -144,6 +158,16 @@ class PluginUserContaminants < Plugin
|
|
144
158
|
default_value = "" #File.join($FORMATTED_DB_PATH,'user_contaminant.fasta')
|
145
159
|
params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
|
146
160
|
|
161
|
+
comment='Blast task template for user contaminations'
|
162
|
+
#default_value = 'blastn'
|
163
|
+
default_value = 'megablast'
|
164
|
+
params.check_param(errors,'blast_task_template_user_contaminants','String',default_value,comment)
|
165
|
+
|
166
|
+
comment='Blast extra params for user contaminations'
|
167
|
+
#default_value = ''
|
168
|
+
default_value = '"-word_size=22"'
|
169
|
+
params.check_param(errors,'blast_extra_params_user_contaminants','String',default_value,comment)
|
170
|
+
|
147
171
|
return errors
|
148
172
|
end
|
149
173
|
|
@@ -40,7 +40,17 @@ class PluginVectors < Plugin
|
|
40
40
|
|
41
41
|
# fastas=fastas.join("\n")
|
42
42
|
|
43
|
-
blast_table_results = blast.do_blast(fastas,:xml)
|
43
|
+
#blast_table_results = blast.do_blast(fastas,:xml)
|
44
|
+
|
45
|
+
t1=Time.now
|
46
|
+
blast_table_results = blast.do_blast(fastas,:table,false)
|
47
|
+
add_plugin_stats('execution_time','blast',Time.now-t1)
|
48
|
+
|
49
|
+
t1=Time.now
|
50
|
+
#blast_table_results = BlastStreamxmlResult.new(blast_table_results)
|
51
|
+
blast_table_results = BlastTableResult.new(blast_table_results)
|
52
|
+
add_plugin_stats('execution_time','parse',Time.now-t1)
|
53
|
+
|
44
54
|
|
45
55
|
# puts blast_table_results.inspect
|
46
56
|
|
@@ -50,7 +60,7 @@ class PluginVectors < Plugin
|
|
50
60
|
|
51
61
|
def exec_seq(seq,blast_query)
|
52
62
|
if blast_query.query_id != seq.seq_name
|
53
|
-
|
63
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
54
64
|
end
|
55
65
|
|
56
66
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
|
data/lib/seqtrimnext/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.66
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dario Guerrero
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-05-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -219,6 +219,7 @@ executables:
|
|
219
219
|
- resume_clusters.rb
|
220
220
|
- resume_execution_times.rb
|
221
221
|
- resume_rejected.rb
|
222
|
+
- resume_stn_contaminants.rb
|
222
223
|
- resume_stn_stats.rb
|
223
224
|
- reverse_paired.rb
|
224
225
|
- seqtrimnext
|
@@ -252,6 +253,7 @@ files:
|
|
252
253
|
- bin/resume_clusters.rb
|
253
254
|
- bin/resume_execution_times.rb
|
254
255
|
- bin/resume_rejected.rb
|
256
|
+
- bin/resume_stn_contaminants.rb
|
255
257
|
- bin/resume_stn_stats.rb
|
256
258
|
- bin/reverse_paired.rb
|
257
259
|
- bin/seqtrimnext
|
@@ -380,7 +382,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
380
382
|
version: '0'
|
381
383
|
requirements: []
|
382
384
|
rubyforge_project:
|
383
|
-
rubygems_version: 2.4.
|
385
|
+
rubygems_version: 2.4.8
|
384
386
|
signing_key:
|
385
387
|
specification_version: 4
|
386
388
|
summary: Sequences preprocessing and cleaning software
|