seqtrimnext 2.0.62 → 2.0.66

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1fb6845331e9b46e2a53f29312fce204869a91cf
4
- data.tar.gz: 18d138fe1b899582daae2e4118cd6118853afc29
3
+ metadata.gz: ab74e9c551c43eb30da4a881ab544c9d7754ecaa
4
+ data.tar.gz: acdf2c4e6d4e5d36d5fafbe1e2004178ed33b145
5
5
  SHA512:
6
- metadata.gz: d22ad436ce0b15ff783d15b871491adaa5947f9c1da4a7247712f7d6e9e790978ad90ab22657dc7768b51a61d6122f874c7853e9d0279f0dd408702a681ca5f0
7
- data.tar.gz: 67d4af5e0bb8dc2eba127b6fe1ff04684c7b96db3fbad9e94cb6a222d88d4da3b5b5fd204022f70c0e14d2511ae476a7814f3ec32d21900c73f2370462207faa
6
+ metadata.gz: d60d9cb31be6eab17ff47e81e77ae3246408dee62f4061e040a9efdcfc9e6f54997e873e244a337091be391a00ca05eec2cd8d5da7b6ecdebf468b76540b5a39
7
+ data.tar.gz: bf2c6174ca48e5ea0d1bf9b7da3e58b01d89016674fa7050473a6dca2dc105d6156281c080e25d5906384a4587b7a51571b51ed9155043e0dee71546fff5ecb7
@@ -3,7 +3,7 @@
3
3
  require 'json'
4
4
 
5
5
  if ARGV.count<1
6
- puts "Usage: #{$0} [-t] [-j] stats1.json"
6
+ puts "Usage: #{$0} [-t] [-j] [-h] stats1.json"
7
7
  exit -1
8
8
  end
9
9
 
@@ -20,6 +20,15 @@ if ARGV[0]=='-j'
20
20
  ARGV.shift
21
21
  end
22
22
 
23
+ time_divider=1
24
+ # print header
25
+ if ARGV[0]=='-h'
26
+ time_divider=3600
27
+ puts "Times are in hours"
28
+ ARGV.shift
29
+ end
30
+
31
+
23
32
 
24
33
  ARGV.each do |file_path|
25
34
  sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
@@ -34,7 +43,7 @@ ARGV.each do |file_path|
34
43
  begin
35
44
  stats.keys.each do |k|
36
45
  if stats[k]['execution_time']
37
- res[k]=stats[k]['execution_time']['total_seconds']
46
+ res[k]=stats[k]['execution_time']['total_seconds'].to_f/time_divider
38
47
  total+=res[k]
39
48
  end
40
49
  end
@@ -48,10 +57,10 @@ ARGV.each do |file_path|
48
57
 
49
58
  if stats['scbi_mapreduce']
50
59
  res['TOTAL_workers']=stats['scbi_mapreduce']['connected_workers']
51
- res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']
52
- res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']
53
- res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']
54
- res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']
60
+ res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']/time_divider
61
+ res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']/time_divider
62
+ res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']/time_divider
63
+ res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']/time_divider
55
64
  end
56
65
 
57
66
  if puts_json
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'json'
4
+
5
+ if ARGV.count<1
6
+ puts "Usage: #{$0} stats1.json [stats2.json stats3.json,...]"
7
+ exit -1
8
+ end
9
+
10
+ # print header
11
+ if ARGV[0]=='-t'
12
+ #heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
13
+ #puts heads.join("\t")
14
+ ARGV.shift
15
+ end
16
+
17
+ contaminants={}
18
+
19
+ ARGV.each do |file_path|
20
+ sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
21
+
22
+ stats=JSON::parse(File.read(file_path))
23
+
24
+ res=[]
25
+ cont=stats['PluginContaminants']['contaminants_ids']
26
+
27
+ limit=60
28
+ cont.keys.sort{|c1,c2| cont[c2].to_i <=> cont[c1].to_i}.each do |k|
29
+ #puts "#{k} => #{cont[k]}"
30
+ contaminants[k]=(contaminants[k] || 0 ) + cont[k]
31
+ limit = limit -1
32
+ break if limit==0
33
+ end
34
+
35
+ end
36
+
37
+ puts JSON::pretty_generate(contaminants)
@@ -10,10 +10,11 @@ end
10
10
  # print header
11
11
  if ARGV[0]=='-t'
12
12
  heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
13
- puts heads.join("\t")
13
+ puts heads.join("\t")
14
14
  ARGV.shift
15
15
  end
16
16
 
17
+
17
18
  ARGV.each do |file_path|
18
19
  sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
19
20
 
data/bin/seqtrimnext CHANGED
@@ -216,7 +216,7 @@ optparse = OptionParser.new do |opts|
216
216
  options[:workers] = Integer(workers)
217
217
  rescue
218
218
  STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
219
- exit
219
+ exit -1
220
220
  end
221
221
 
222
222
  end
@@ -268,7 +268,7 @@ optparse = OptionParser.new do |opts|
268
268
  options[:format] = value
269
269
  if !['sanger','illumina15', 'illumina18'].include?(value)
270
270
  STDERR.puts "ERROR: Invalid FASTQ format parameter #{value}"
271
- exit
271
+ exit -1
272
272
  end
273
273
  end
274
274
 
@@ -301,7 +301,7 @@ optparse = OptionParser.new do |opts|
301
301
  options[:template] = file
302
302
  end
303
303
 
304
- options[:chunk_size] = 100
304
+ options[:chunk_size] = 5000
305
305
  opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
306
306
  options[:chunk_size] = cs.to_i
307
307
  end
@@ -332,7 +332,7 @@ optparse = OptionParser.new do |opts|
332
332
  opts.on_tail( '-h', '--help', 'Display this screen' ) do
333
333
  puts opts
334
334
  show_additional_help
335
- exit
335
+ exit -1
336
336
  end
337
337
  end
338
338
 
@@ -342,13 +342,13 @@ optparse.parse!
342
342
  if options[:list_db] then
343
343
  # List database entries in a database
344
344
  ListDb.new($DB_PATH,options[:list_db_name])
345
- exit
345
+ exit -1
346
346
  end
347
347
 
348
348
  if options[:gen_params] then
349
349
  # Generates a sample params file in current directory
350
350
  Params.generate_sample_params
351
- exit
351
+ exit -1
352
352
  end
353
353
 
354
354
  #set logger
@@ -453,3 +453,5 @@ else
453
453
 
454
454
  $LOG.info "If you want a detailed report in PDF format, be sure you have installed the optional seqtrimnext_report gem (gem install seqtrimnext_report)#{skip_text}"
455
455
  end
456
+
457
+ exit(Seqtrim.exit_status)
data/bin/split_fastq.rb CHANGED
@@ -14,7 +14,7 @@ output_name = ARGV.shift
14
14
  split_by = ARGV.shift.to_i
15
15
 
16
16
  gz_arg=ARGV.shift
17
- gz=false
17
+ gz=''
18
18
 
19
19
  if !gz_arg.nil? and gz_arg.index('-gz')
20
20
  gz='.gz'
@@ -17,6 +17,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
17
17
  @@full_stats={}
18
18
  @@params= params
19
19
  @@exit = false
20
+ @@exit_status=0
20
21
 
21
22
  @@ongoing_stats={}
22
23
  @@ongoing_stats[:sequence_count] = 0
@@ -85,6 +86,10 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
85
86
 
86
87
  end
87
88
 
89
+ def self.exit_status
90
+ return @@exit_status
91
+ end
92
+
88
93
  def self.end_work_manager
89
94
 
90
95
  # if initial files doesn't exists, create it
@@ -115,6 +120,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
115
120
 
116
121
  end
117
122
 
123
+ def self.global_error_received(error_exception)
124
+ $LOG.error "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
125
+ @@errors_file.puts "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
126
+ @@errors_file.puts "="*60
127
+ @@exit_status=-1
128
+ SeqtrimWorkManager.controlled_exit
129
+ end
130
+
118
131
  def self.work_manager_finished
119
132
  @@full_stats['scbi_mapreduce']=@@stats
120
133
 
@@ -129,10 +142,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
129
142
  def error_received(worker_error, obj)
130
143
  @@errors_file.puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
131
144
  @@errors_file.puts "="*60
145
+ @@exit_status=-1
146
+ SeqtrimWorkManager.controlled_exit
147
+
132
148
  end
133
149
 
134
150
  def too_many_errors_received
135
151
  $LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
152
+ @@exit_status=-1
136
153
  end
137
154
 
138
155
  def worker_initial_config
@@ -81,8 +81,8 @@ class SeqtrimWorker < ScbiMapreduce::Worker
81
81
  def starting_worker
82
82
 
83
83
  # $WORKER_LOG.level = Logger::ERROR
84
- $WORKER_LOG.level = Logger::WARN
85
- #$WORKER_LOG.level = Logger::INFO
84
+ #$WORKER_LOG.level = Logger::WARN
85
+ $WORKER_LOG.level = Logger::INFO
86
86
  $WORKER_LOG.info "Loading actions"
87
87
 
88
88
  @action_manager = ActionManager.new
@@ -33,8 +33,18 @@ class Params
33
33
  if !line.empty?
34
34
  if !(line =~ /^\s*#/) # if line is not a comment
35
35
  # extract the parameter's name in params[0] and the parameter's value in params[1]
36
- params = line.split(/\s*=\s*/)
36
+ #params = line.split(/\s*=\s*/)
37
37
 
38
+ # store in the hash the pair key/value, in our case will be name/numeric-value ,
39
+ # that are save in params[0] and params[1], respectively
40
+ #if (!params[0].nil?) && (!params[1].nil?)
41
+ # set_param(params[0].strip,params[1].strip,comments)
42
+ # comments=[]
43
+ #end
44
+
45
+ line =~ /^\s*([^=]*)\s*=\s*(.*)\s*$/
46
+ params=[$1,$2]
47
+
38
48
  # store in the hash the pair key/value, in our case will be name/numeric-value ,
39
49
  # that are save in params[0] and params[1], respectively
40
50
  if (!params[0].nil?) && (!params[1].nil?)
@@ -42,7 +52,9 @@ class Params
42
52
  comments=[]
43
53
  end
44
54
 
45
- #$LOG.debug "read: #{params[1]}"
55
+
56
+ $LOG.debug "read: #{params[0]}=#{params[1]}" if !$LOG.nil?
57
+
46
58
  else
47
59
  comments << line.gsub(/^\s*#/,'')
48
60
  end # end if comentario
@@ -15,6 +15,12 @@ require 'action_manager'
15
15
 
16
16
  class Seqtrim
17
17
 
18
+
19
+
20
+ def self.exit_status
21
+ return SeqtrimWorkManager.exit_status
22
+ end
23
+
18
24
  # First of all, reads the file's parameters, where are the values of all parameters and the 'plugin_list' that specifies the order of execution from the plugins.
19
25
  #
20
26
  # Secondly, loads the plugins in a folder .
@@ -24,7 +30,6 @@ class Seqtrim
24
30
  # After that, creates a thread's pool of a determinate number of workers, e.g. 10 threads,
25
31
  # reads the sequences from files 'fasta' , until now without qualities,
26
32
  # and executes the plugins over the sequences in the pool of threads
27
-
28
33
 
29
34
  def get_custom_cdhit(cd_hit_input_file,params)
30
35
  cmd=''
@@ -136,7 +141,6 @@ class Seqtrim
136
141
  default_value=Seqtrimnext::SEQTRIM_VERSION
137
142
  params.check_param(errors,'seqtrim_version','String',default_value,comment)
138
143
 
139
-
140
144
  if !errors.empty?
141
145
  $LOG.error 'Please, define the following global parameters in params file:'
142
146
  errors.each do |error|
@@ -166,7 +170,7 @@ class Seqtrim
166
170
  if File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
167
171
  if !options[:use_checkpoint]
168
172
  STDERR.puts "ERROR: A checkpoint file exists, either delete it or provide -C flag to use it"
169
- exit
173
+ exit(-1)
170
174
  end
171
175
  end
172
176
 
@@ -230,7 +234,7 @@ class Seqtrim
230
234
 
231
235
  $LOG.info "Checking global params"
232
236
  if !check_global_params(params)
233
- exit
237
+ exit(-1)
234
238
  end
235
239
 
236
240
  # Load actions
@@ -253,8 +257,7 @@ class Seqtrim
253
257
 
254
258
  # save used params to file
255
259
  params.save_file('used_params.txt')
256
-
257
- exit
260
+ exit(-1)
258
261
  end
259
262
 
260
263
  if !Dir.exists?(OUTPUT_PATH)
@@ -297,7 +300,7 @@ class Seqtrim
297
300
  params.load_repeated_seqs('clusters.fasta.clstr')
298
301
  else
299
302
  $LOG.error("Exiting due to not found clusters.fasta.clstr. Maybe cd-hit failed. Check cd-hit.out")
300
- exit
303
+ exit(-1)
301
304
  end
302
305
  end
303
306
 
@@ -367,7 +370,12 @@ class Seqtrim
367
370
  sequence_readers.each do |file|
368
371
  file.close
369
372
  end
370
-
373
+
374
+ if SeqtrimWorkManager.exit_status>=0
375
+ $LOG.info "Exit status: #{SeqtrimWorkManager.exit_status}"
376
+ else
377
+ $LOG.error "Exit status: #{SeqtrimWorkManager.exit_status}"
378
+ end
371
379
  $LOG.info 'Closing server'
372
380
  end
373
381
 
@@ -23,12 +23,15 @@ class Plugin
23
23
  t1=Time.now
24
24
  execute(seq)
25
25
  t2=Time.now
26
+
27
+ add_plugin_stats('execution_time','total_seconds',t2-t1)
26
28
  end
27
29
 
28
-
29
- @stats['execution_time']={}
30
+ end
30
31
 
31
- @stats['execution_time']['total_seconds']=t2-t1
32
+ def add_plugin_stats(cat,item,elapsed_time)
33
+ @stats[cat]={} if @stats[cat].nil?
34
+ @stats[cat][item]=elapsed_time
32
35
  end
33
36
 
34
37
  def can_execute?
@@ -40,7 +43,9 @@ class Plugin
40
43
 
41
44
  #Begins the plugin's execution whit the sequence "seq"
42
45
  def execute(seqs)
46
+ t1=Time.now
43
47
  blasts=do_blasts(seqs)
48
+
44
49
 
45
50
  if !blasts.empty?
46
51
 
@@ -49,18 +54,24 @@ class Plugin
49
54
  else
50
55
  queries = blasts.querys
51
56
  end
52
-
57
+
58
+ add_plugin_stats('execution_time','blast_and_parse',Time.now-t1)
59
+
60
+ t1=Time.now
53
61
  seqs.each_with_index do |s,i|
54
62
  exec_seq(s,queries[i])
55
63
  end
56
64
 
57
65
  else # there is no blast
58
66
 
67
+ t1=Time.now
59
68
  seqs.each do |s|
60
69
  exec_seq(s,nil)
61
70
  end
62
-
63
71
  end
72
+
73
+ add_plugin_stats('execution_time','exec_seq',Time.now-t1)
74
+
64
75
  end
65
76
 
66
77
  def do_blasts(seqs)
@@ -18,6 +18,9 @@ class PluginAbAdapters < Plugin
18
18
 
19
19
  # find MIDS with less results than max_target_seqs value
20
20
  blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
21
+
22
+ # con culling limit hay situaciones en las que un hit largo con 1 mismatch es ignorado porque hay otro más corto que no tiene ningun error, no es aceptable.
23
+ #blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE} -culling_limit=1")
21
24
  $LOG.debug('BLAST:'+blast.get_blast_cmd)
22
25
 
23
26
  fastas=[]
@@ -29,7 +32,32 @@ class PluginAbAdapters < Plugin
29
32
 
30
33
  # fastas=fastas.join("\n")
31
34
 
32
- blast_table_results = blast.do_blast(fastas)
35
+ #blast_table_results = blast.do_blast(fastas)
36
+ #blast_table_results = BlastTableResult.new(blast_table_results)
37
+
38
+
39
+ t1=Time.now
40
+ blast_table_results = blast.do_blast(fastas,:table,false)
41
+ add_plugin_stats('execution_time','blast',Time.now-t1)
42
+
43
+
44
+ #f=File.new("/tmp/salida_#{fastas.first.gsub('>','').gsub('/','_')}.blast",'w+')
45
+ #f.puts blast.get_blast_cmd
46
+ #f.puts blast_table_results
47
+ #f.close
48
+
49
+ t1=Time.now
50
+ blast_table_results = BlastTableResult.new(blast_table_results)
51
+ add_plugin_stats('execution_time','parse',Time.now-t1)
52
+
53
+
54
+ # t1=Time.now
55
+ # blast_table_results = blast.do_blast(fastas,:xml,false)
56
+ # add_plugin_stats('execution_time','blast',Time.now-t1)
57
+
58
+ # t1=Time.now
59
+ # blast_table_results = BlastStreamxmlResult.new(blast_table_results)
60
+ # add_plugin_stats('execution_time','parse',Time.now-t1)
33
61
 
34
62
  # puts blast_table_results.inspect
35
63
 
@@ -27,7 +27,7 @@ class PluginAdapters < Plugin
27
27
 
28
28
  # fastas=fastas.join("\n")
29
29
 
30
- blast_table_results = blast.do_blast(fastas,:xml)
30
+ blast_table_results = blast.do_blast(fastas,:table)
31
31
 
32
32
  # puts blast_table_results.inspect
33
33
 
@@ -23,12 +23,22 @@ class PluginContaminants < Plugin
23
23
  # find MIDS with less results than max_target_seqs value
24
24
  # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
25
25
 
26
+ # This message is due to short sequences
27
+ #Warning: Could not calculate ungapped Karlin-Altschul parameters due to an invalid query sequence or its translation. Please verify the query sequence(s) and/or filtering options
28
+
26
29
  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
27
30
  # y una secuencia de baja complejidad como entrada
28
31
 
29
- blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
32
+ task_template=@params.get_param('blast_task_template_contaminants')
33
+ extra_params=@params.get_param('blast_extra_params_contaminants')
34
+
35
+ extra_params=extra_params.gsub(/^\"|\"?$/, '')
36
+
37
+ #blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
38
+
39
+ blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
30
40
 
31
- $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
41
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
32
42
 
33
43
  fastas=[]
34
44
 
@@ -43,7 +53,16 @@ class PluginContaminants < Plugin
43
53
  # $LOG.info(fastas)
44
54
  # $LOG.info('-'*20)
45
55
 
46
- blast_table_results = blast.do_blast(fastas,:xml)
56
+ #blast_table_results = blast.do_blast(fastas,:xml)
57
+ t1=Time.now
58
+ #blast_table_results = blast.do_blast(fastas,:xml,false)
59
+ blast_table_results = blast.do_blast(fastas,:table,false)
60
+ add_plugin_stats('execution_time','blast',Time.now-t1)
61
+
62
+ t1=Time.now
63
+ #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
64
+ blast_table_results = BlastTableResult.new(blast_table_results)
65
+ add_plugin_stats('execution_time','parse',Time.now-t1)
47
66
 
48
67
  # $LOG.info(blast_table_results.inspect)
49
68
 
@@ -62,12 +81,14 @@ class PluginContaminants < Plugin
62
81
  return
63
82
  end
64
83
 
84
+ #if blast_query.query_def != seq.seq_name
65
85
  if blast_query.query_id != seq.seq_name
66
- # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
86
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
67
87
  end
68
88
 
69
89
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
70
90
 
91
+ #add_plugin_stats('hsp_count',seq.seq_name,blast_query.hits.count)
71
92
 
72
93
  #blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
73
94
  # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
@@ -195,11 +216,19 @@ class PluginContaminants < Plugin
195
216
  default_value = 'true'
196
217
  params.check_param(errors,'contaminants_reject','String',default_value,comment)
197
218
 
198
-
199
219
  comment='Path for contaminants database'
200
220
  default_value = File.join($FORMATTED_DB_PATH,'contaminants.fasta')
201
221
  params.check_param(errors,'contaminants_db','DB',default_value,comment)
202
222
 
223
+ comment='Blast task template for contaminations'
224
+ #default_value = 'blastn'
225
+ default_value = 'megablast'
226
+ params.check_param(errors,'blast_task_template_contaminants','String',default_value,comment)
227
+
228
+ comment='Blast extra params for contaminations'
229
+ #default_value = ''
230
+ default_value = '"-word_size=22"'
231
+ params.check_param(errors,'blast_extra_params_contaminants','String',default_value,comment)
203
232
 
204
233
  return errors
205
234
  end
@@ -73,6 +73,7 @@ class PluginLowComplexity < Plugin
73
73
 
74
74
  if !actions.empty?
75
75
  add_stats('low_complexity',total_dust)
76
+ seq.add_file_tag(0, 'low_complexity', :both, 100)
76
77
  seq.add_actions(actions)
77
78
  end
78
79
 
@@ -92,6 +93,7 @@ class PluginLowComplexity < Plugin
92
93
  # default_value = 80
93
94
  # params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
94
95
  #
96
+
95
97
  return errors
96
98
  end
97
99
 
@@ -170,9 +170,10 @@ class PluginLowQuality < Plugin
170
170
  default_value = 20
171
171
  params.check_param(errors,'min_quality','Integer',default_value,comment)
172
172
 
173
- comment='Quality window for scanning low quality segments'
174
- default_value = 15
175
- params.check_param(errors,'window_width','Integer',default_value,comment)
173
+
174
+ #comment='Quality window for scanning low quality segments'
175
+ #default_value = 15
176
+ #params.check_param(errors,'window_width','Integer',default_value,comment)
176
177
 
177
178
 
178
179
  comment='Minimum length of a bad quality segment inside the sequence'
@@ -43,9 +43,14 @@ class PluginUserContaminants < Plugin
43
43
  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
44
44
  # y una secuencia de baja complejidad como entrada
45
45
 
46
- blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
46
+ task_template=@params.get_param('blast_task_template_user_contaminants')
47
+ extra_params=@params.get_param('blast_extra_params_user_contaminants')
47
48
 
48
- $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
49
+ extra_params=extra_params.gsub(/^\"|\"?$/, '')
50
+
51
+ blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
52
+
53
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
49
54
 
50
55
  fastas=[]
51
56
 
@@ -55,7 +60,16 @@ class PluginUserContaminants < Plugin
55
60
  end
56
61
 
57
62
 
58
- blast_table_results = blast.do_blast(fastas,:xml)
63
+ #blast_table_results = blast.do_blast(fastas,:xml)
64
+ t1=Time.now
65
+ blast_table_results = blast.do_blast(fastas,:table,false)
66
+ add_plugin_stats('execution_time','blast',Time.now-t1)
67
+
68
+ t1=Time.now
69
+ #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
70
+ blast_table_results = BlastTableResult.new(blast_table_results)
71
+ add_plugin_stats('execution_time','parse',Time.now-t1)
72
+
59
73
 
60
74
  return blast_table_results
61
75
  end
@@ -63,7 +77,7 @@ class PluginUserContaminants < Plugin
63
77
 
64
78
  def exec_seq(seq,blast_query)
65
79
  if blast_query.query_id != seq.seq_name
66
- # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
80
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
67
81
  end
68
82
 
69
83
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
@@ -144,6 +158,16 @@ class PluginUserContaminants < Plugin
144
158
  default_value = "" #File.join($FORMATTED_DB_PATH,'user_contaminant.fasta')
145
159
  params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
146
160
 
161
+ comment='Blast task template for user contaminations'
162
+ #default_value = 'blastn'
163
+ default_value = 'megablast'
164
+ params.check_param(errors,'blast_task_template_user_contaminants','String',default_value,comment)
165
+
166
+ comment='Blast extra params for user contaminations'
167
+ #default_value = ''
168
+ default_value = '"-word_size=22"'
169
+ params.check_param(errors,'blast_extra_params_user_contaminants','String',default_value,comment)
170
+
147
171
  return errors
148
172
  end
149
173
 
@@ -40,7 +40,17 @@ class PluginVectors < Plugin
40
40
 
41
41
  # fastas=fastas.join("\n")
42
42
 
43
- blast_table_results = blast.do_blast(fastas,:xml)
43
+ #blast_table_results = blast.do_blast(fastas,:xml)
44
+
45
+ t1=Time.now
46
+ blast_table_results = blast.do_blast(fastas,:table,false)
47
+ add_plugin_stats('execution_time','blast',Time.now-t1)
48
+
49
+ t1=Time.now
50
+ #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
51
+ blast_table_results = BlastTableResult.new(blast_table_results)
52
+ add_plugin_stats('execution_time','parse',Time.now-t1)
53
+
44
54
 
45
55
  # puts blast_table_results.inspect
46
56
 
@@ -50,7 +60,7 @@ class PluginVectors < Plugin
50
60
 
51
61
  def exec_seq(seq,blast_query)
52
62
  if blast_query.query_id != seq.seq_name
53
- # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
63
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
54
64
  end
55
65
 
56
66
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
@@ -1,4 +1,4 @@
1
1
  module Seqtrimnext
2
- VERSION = "2.0.62"
2
+ VERSION = "2.0.66"
3
3
  SEQTRIM_VERSION = VERSION
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.62
4
+ version: 2.0.66
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dario Guerrero
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-20 00:00:00.000000000 Z
12
+ date: 2016-05-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -219,6 +219,7 @@ executables:
219
219
  - resume_clusters.rb
220
220
  - resume_execution_times.rb
221
221
  - resume_rejected.rb
222
+ - resume_stn_contaminants.rb
222
223
  - resume_stn_stats.rb
223
224
  - reverse_paired.rb
224
225
  - seqtrimnext
@@ -252,6 +253,7 @@ files:
252
253
  - bin/resume_clusters.rb
253
254
  - bin/resume_execution_times.rb
254
255
  - bin/resume_rejected.rb
256
+ - bin/resume_stn_contaminants.rb
255
257
  - bin/resume_stn_stats.rb
256
258
  - bin/reverse_paired.rb
257
259
  - bin/seqtrimnext
@@ -380,7 +382,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
380
382
  version: '0'
381
383
  requirements: []
382
384
  rubyforge_project:
383
- rubygems_version: 2.4.4
385
+ rubygems_version: 2.4.8
384
386
  signing_key:
385
387
  specification_version: 4
386
388
  summary: Sequences preprocessing and cleaning software