seqtrimnext 2.0.62 → 2.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1fb6845331e9b46e2a53f29312fce204869a91cf
4
- data.tar.gz: 18d138fe1b899582daae2e4118cd6118853afc29
3
+ metadata.gz: ab74e9c551c43eb30da4a881ab544c9d7754ecaa
4
+ data.tar.gz: acdf2c4e6d4e5d36d5fafbe1e2004178ed33b145
5
5
  SHA512:
6
- metadata.gz: d22ad436ce0b15ff783d15b871491adaa5947f9c1da4a7247712f7d6e9e790978ad90ab22657dc7768b51a61d6122f874c7853e9d0279f0dd408702a681ca5f0
7
- data.tar.gz: 67d4af5e0bb8dc2eba127b6fe1ff04684c7b96db3fbad9e94cb6a222d88d4da3b5b5fd204022f70c0e14d2511ae476a7814f3ec32d21900c73f2370462207faa
6
+ metadata.gz: d60d9cb31be6eab17ff47e81e77ae3246408dee62f4061e040a9efdcfc9e6f54997e873e244a337091be391a00ca05eec2cd8d5da7b6ecdebf468b76540b5a39
7
+ data.tar.gz: bf2c6174ca48e5ea0d1bf9b7da3e58b01d89016674fa7050473a6dca2dc105d6156281c080e25d5906384a4587b7a51571b51ed9155043e0dee71546fff5ecb7
@@ -3,7 +3,7 @@
3
3
  require 'json'
4
4
 
5
5
  if ARGV.count<1
6
- puts "Usage: #{$0} [-t] [-j] stats1.json"
6
+ puts "Usage: #{$0} [-t] [-j] [-h] stats1.json"
7
7
  exit -1
8
8
  end
9
9
 
@@ -20,6 +20,15 @@ if ARGV[0]=='-j'
20
20
  ARGV.shift
21
21
  end
22
22
 
23
+ time_divider=1
24
+ # print header
25
+ if ARGV[0]=='-h'
26
+ time_divider=3600
27
+ puts "Times are in hours"
28
+ ARGV.shift
29
+ end
30
+
31
+
23
32
 
24
33
  ARGV.each do |file_path|
25
34
  sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
@@ -34,7 +43,7 @@ ARGV.each do |file_path|
34
43
  begin
35
44
  stats.keys.each do |k|
36
45
  if stats[k]['execution_time']
37
- res[k]=stats[k]['execution_time']['total_seconds']
46
+ res[k]=stats[k]['execution_time']['total_seconds'].to_f/time_divider
38
47
  total+=res[k]
39
48
  end
40
49
  end
@@ -48,10 +57,10 @@ ARGV.each do |file_path|
48
57
 
49
58
  if stats['scbi_mapreduce']
50
59
  res['TOTAL_workers']=stats['scbi_mapreduce']['connected_workers']
51
- res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']
52
- res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']
53
- res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']
54
- res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']
60
+ res['TOTAL_read']=stats['scbi_mapreduce']['total_read_time']/time_divider
61
+ res['TOTAL_write']=stats['scbi_mapreduce']['total_write_time']/time_divider
62
+ res['TOTAL_manager_idle']=stats['scbi_mapreduce']['total_manager_idle_time']/time_divider
63
+ res['TOTAL_execution']=stats['scbi_mapreduce']['total_seconds']/time_divider
55
64
  end
56
65
 
57
66
  if puts_json
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'json'
4
+
5
+ if ARGV.count<1
6
+ puts "Usage: #{$0} stats1.json [stats2.json stats3.json,...]"
7
+ exit -1
8
+ end
9
+
10
+ # print header
11
+ if ARGV[0]=='-t'
12
+ #heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
13
+ #puts heads.join("\t")
14
+ ARGV.shift
15
+ end
16
+
17
+ contaminants={}
18
+
19
+ ARGV.each do |file_path|
20
+ sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
21
+
22
+ stats=JSON::parse(File.read(file_path))
23
+
24
+ res=[]
25
+ cont=stats['PluginContaminants']['contaminants_ids']
26
+
27
+ limit=60
28
+ cont.keys.sort{|c1,c2| cont[c2].to_i <=> cont[c1].to_i}.each do |k|
29
+ #puts "#{k} => #{cont[k]}"
30
+ contaminants[k]=(contaminants[k] || 0 ) + cont[k]
31
+ limit = limit -1
32
+ break if limit==0
33
+ end
34
+
35
+ end
36
+
37
+ puts JSON::pretty_generate(contaminants)
@@ -10,10 +10,11 @@ end
10
10
  # print header
11
11
  if ARGV[0]=='-t'
12
12
  heads=['sample_name','input_count','sequence_count_paired','sequence_count_single','rejected','rejected_percent']
13
- puts heads.join("\t")
13
+ puts heads.join("\t")
14
14
  ARGV.shift
15
15
  end
16
16
 
17
+
17
18
  ARGV.each do |file_path|
18
19
  sample_name = File.basename(File.expand_path(File.join(file_path,'..','..')))
19
20
 
data/bin/seqtrimnext CHANGED
@@ -216,7 +216,7 @@ optparse = OptionParser.new do |opts|
216
216
  options[:workers] = Integer(workers)
217
217
  rescue
218
218
  STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
219
- exit
219
+ exit -1
220
220
  end
221
221
 
222
222
  end
@@ -268,7 +268,7 @@ optparse = OptionParser.new do |opts|
268
268
  options[:format] = value
269
269
  if !['sanger','illumina15', 'illumina18'].include?(value)
270
270
  STDERR.puts "ERROR: Invalid FASTQ format parameter #{value}"
271
- exit
271
+ exit -1
272
272
  end
273
273
  end
274
274
 
@@ -301,7 +301,7 @@ optparse = OptionParser.new do |opts|
301
301
  options[:template] = file
302
302
  end
303
303
 
304
- options[:chunk_size] = 100
304
+ options[:chunk_size] = 5000
305
305
  opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
306
306
  options[:chunk_size] = cs.to_i
307
307
  end
@@ -332,7 +332,7 @@ optparse = OptionParser.new do |opts|
332
332
  opts.on_tail( '-h', '--help', 'Display this screen' ) do
333
333
  puts opts
334
334
  show_additional_help
335
- exit
335
+ exit -1
336
336
  end
337
337
  end
338
338
 
@@ -342,13 +342,13 @@ optparse.parse!
342
342
  if options[:list_db] then
343
343
  # List database entries in a database
344
344
  ListDb.new($DB_PATH,options[:list_db_name])
345
- exit
345
+ exit -1
346
346
  end
347
347
 
348
348
  if options[:gen_params] then
349
349
  # Generates a sample params file in current directory
350
350
  Params.generate_sample_params
351
- exit
351
+ exit -1
352
352
  end
353
353
 
354
354
  #set logger
@@ -453,3 +453,5 @@ else
453
453
 
454
454
  $LOG.info "If you want a detailed report in PDF format, be sure you have installed the optional seqtrimnext_report gem (gem install seqtrimnext_report)#{skip_text}"
455
455
  end
456
+
457
+ exit(Seqtrim.exit_status)
data/bin/split_fastq.rb CHANGED
@@ -14,7 +14,7 @@ output_name = ARGV.shift
14
14
  split_by = ARGV.shift.to_i
15
15
 
16
16
  gz_arg=ARGV.shift
17
- gz=false
17
+ gz=''
18
18
 
19
19
  if !gz_arg.nil? and gz_arg.index('-gz')
20
20
  gz='.gz'
@@ -17,6 +17,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
17
17
  @@full_stats={}
18
18
  @@params= params
19
19
  @@exit = false
20
+ @@exit_status=0
20
21
 
21
22
  @@ongoing_stats={}
22
23
  @@ongoing_stats[:sequence_count] = 0
@@ -85,6 +86,10 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
85
86
 
86
87
  end
87
88
 
89
+ def self.exit_status
90
+ return @@exit_status
91
+ end
92
+
88
93
  def self.end_work_manager
89
94
 
90
95
  # if initial files doesn't exists, create it
@@ -115,6 +120,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
115
120
 
116
121
  end
117
122
 
123
+ def self.global_error_received(error_exception)
124
+ $LOG.error "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
125
+ @@errors_file.puts "Global error:\n" + error_exception.message + ":\n" +error_exception.backtrace.join("\n")
126
+ @@errors_file.puts "="*60
127
+ @@exit_status=-1
128
+ SeqtrimWorkManager.controlled_exit
129
+ end
130
+
118
131
  def self.work_manager_finished
119
132
  @@full_stats['scbi_mapreduce']=@@stats
120
133
 
@@ -129,10 +142,14 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
129
142
  def error_received(worker_error, obj)
130
143
  @@errors_file.puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
131
144
  @@errors_file.puts "="*60
145
+ @@exit_status=-1
146
+ SeqtrimWorkManager.controlled_exit
147
+
132
148
  end
133
149
 
134
150
  def too_many_errors_received
135
151
  $LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
152
+ @@exit_status=-1
136
153
  end
137
154
 
138
155
  def worker_initial_config
@@ -81,8 +81,8 @@ class SeqtrimWorker < ScbiMapreduce::Worker
81
81
  def starting_worker
82
82
 
83
83
  # $WORKER_LOG.level = Logger::ERROR
84
- $WORKER_LOG.level = Logger::WARN
85
- #$WORKER_LOG.level = Logger::INFO
84
+ #$WORKER_LOG.level = Logger::WARN
85
+ $WORKER_LOG.level = Logger::INFO
86
86
  $WORKER_LOG.info "Loading actions"
87
87
 
88
88
  @action_manager = ActionManager.new
@@ -33,8 +33,18 @@ class Params
33
33
  if !line.empty?
34
34
  if !(line =~ /^\s*#/) # if line is not a comment
35
35
  # extract the parameter's name in params[0] and the parameter's value in params[1]
36
- params = line.split(/\s*=\s*/)
36
+ #params = line.split(/\s*=\s*/)
37
37
 
38
+ # store in the hash the pair key/value, in our case will be name/numeric-value ,
39
+ # that are save in params[0] and params[1], respectively
40
+ #if (!params[0].nil?) && (!params[1].nil?)
41
+ # set_param(params[0].strip,params[1].strip,comments)
42
+ # comments=[]
43
+ #end
44
+
45
+ line =~ /^\s*([^=]*)\s*=\s*(.*)\s*$/
46
+ params=[$1,$2]
47
+
38
48
  # store in the hash the pair key/value, in our case will be name/numeric-value ,
39
49
  # that are save in params[0] and params[1], respectively
40
50
  if (!params[0].nil?) && (!params[1].nil?)
@@ -42,7 +52,9 @@ class Params
42
52
  comments=[]
43
53
  end
44
54
 
45
- #$LOG.debug "read: #{params[1]}"
55
+
56
+ $LOG.debug "read: #{params[0]}=#{params[1]}" if !$LOG.nil?
57
+
46
58
  else
47
59
  comments << line.gsub(/^\s*#/,'')
48
60
  end # end if comentario
@@ -15,6 +15,12 @@ require 'action_manager'
15
15
 
16
16
  class Seqtrim
17
17
 
18
+
19
+
20
+ def self.exit_status
21
+ return SeqtrimWorkManager.exit_status
22
+ end
23
+
18
24
  # First of all, reads the file's parameters, where are the values of all parameters and the 'plugin_list' that specifies the order of execution from the plugins.
19
25
  #
20
26
  # Secondly, loads the plugins in a folder .
@@ -24,7 +30,6 @@ class Seqtrim
24
30
  # After that, creates a thread's pool of a determinate number of workers, e.g. 10 threads,
25
31
  # reads the sequences from files 'fasta' , until now without qualities,
26
32
  # and executes the plugins over the sequences in the pool of threads
27
-
28
33
 
29
34
  def get_custom_cdhit(cd_hit_input_file,params)
30
35
  cmd=''
@@ -136,7 +141,6 @@ class Seqtrim
136
141
  default_value=Seqtrimnext::SEQTRIM_VERSION
137
142
  params.check_param(errors,'seqtrim_version','String',default_value,comment)
138
143
 
139
-
140
144
  if !errors.empty?
141
145
  $LOG.error 'Please, define the following global parameters in params file:'
142
146
  errors.each do |error|
@@ -166,7 +170,7 @@ class Seqtrim
166
170
  if File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
167
171
  if !options[:use_checkpoint]
168
172
  STDERR.puts "ERROR: A checkpoint file exists, either delete it or provide -C flag to use it"
169
- exit
173
+ exit(-1)
170
174
  end
171
175
  end
172
176
 
@@ -230,7 +234,7 @@ class Seqtrim
230
234
 
231
235
  $LOG.info "Checking global params"
232
236
  if !check_global_params(params)
233
- exit
237
+ exit(-1)
234
238
  end
235
239
 
236
240
  # Load actions
@@ -253,8 +257,7 @@ class Seqtrim
253
257
 
254
258
  # save used params to file
255
259
  params.save_file('used_params.txt')
256
-
257
- exit
260
+ exit(-1)
258
261
  end
259
262
 
260
263
  if !Dir.exists?(OUTPUT_PATH)
@@ -297,7 +300,7 @@ class Seqtrim
297
300
  params.load_repeated_seqs('clusters.fasta.clstr')
298
301
  else
299
302
  $LOG.error("Exiting due to not found clusters.fasta.clstr. Maybe cd-hit failed. Check cd-hit.out")
300
- exit
303
+ exit(-1)
301
304
  end
302
305
  end
303
306
 
@@ -367,7 +370,12 @@ class Seqtrim
367
370
  sequence_readers.each do |file|
368
371
  file.close
369
372
  end
370
-
373
+
374
+ if SeqtrimWorkManager.exit_status>=0
375
+ $LOG.info "Exit status: #{SeqtrimWorkManager.exit_status}"
376
+ else
377
+ $LOG.error "Exit status: #{SeqtrimWorkManager.exit_status}"
378
+ end
371
379
  $LOG.info 'Closing server'
372
380
  end
373
381
 
@@ -23,12 +23,15 @@ class Plugin
23
23
  t1=Time.now
24
24
  execute(seq)
25
25
  t2=Time.now
26
+
27
+ add_plugin_stats('execution_time','total_seconds',t2-t1)
26
28
  end
27
29
 
28
-
29
- @stats['execution_time']={}
30
+ end
30
31
 
31
- @stats['execution_time']['total_seconds']=t2-t1
32
+ def add_plugin_stats(cat,item,elapsed_time)
33
+ @stats[cat]={} if @stats[cat].nil?
34
+ @stats[cat][item]=elapsed_time
32
35
  end
33
36
 
34
37
  def can_execute?
@@ -40,7 +43,9 @@ class Plugin
40
43
 
41
44
  #Begins the plugin's execution whit the sequence "seq"
42
45
  def execute(seqs)
46
+ t1=Time.now
43
47
  blasts=do_blasts(seqs)
48
+
44
49
 
45
50
  if !blasts.empty?
46
51
 
@@ -49,18 +54,24 @@ class Plugin
49
54
  else
50
55
  queries = blasts.querys
51
56
  end
52
-
57
+
58
+ add_plugin_stats('execution_time','blast_and_parse',Time.now-t1)
59
+
60
+ t1=Time.now
53
61
  seqs.each_with_index do |s,i|
54
62
  exec_seq(s,queries[i])
55
63
  end
56
64
 
57
65
  else # there is no blast
58
66
 
67
+ t1=Time.now
59
68
  seqs.each do |s|
60
69
  exec_seq(s,nil)
61
70
  end
62
-
63
71
  end
72
+
73
+ add_plugin_stats('execution_time','exec_seq',Time.now-t1)
74
+
64
75
  end
65
76
 
66
77
  def do_blasts(seqs)
@@ -18,6 +18,9 @@ class PluginAbAdapters < Plugin
18
18
 
19
19
  # find MIDS with less results than max_target_seqs value
20
20
  blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
21
+
22
+ # con culling limit hay situaciones en las que un hit largo con 1 mismatch es ignorado porque hay otro más corto que no tiene ningun error, no es aceptable.
23
+ #blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE} -culling_limit=1")
21
24
  $LOG.debug('BLAST:'+blast.get_blast_cmd)
22
25
 
23
26
  fastas=[]
@@ -29,7 +32,32 @@ class PluginAbAdapters < Plugin
29
32
 
30
33
  # fastas=fastas.join("\n")
31
34
 
32
- blast_table_results = blast.do_blast(fastas)
35
+ #blast_table_results = blast.do_blast(fastas)
36
+ #blast_table_results = BlastTableResult.new(blast_table_results)
37
+
38
+
39
+ t1=Time.now
40
+ blast_table_results = blast.do_blast(fastas,:table,false)
41
+ add_plugin_stats('execution_time','blast',Time.now-t1)
42
+
43
+
44
+ #f=File.new("/tmp/salida_#{fastas.first.gsub('>','').gsub('/','_')}.blast",'w+')
45
+ #f.puts blast.get_blast_cmd
46
+ #f.puts blast_table_results
47
+ #f.close
48
+
49
+ t1=Time.now
50
+ blast_table_results = BlastTableResult.new(blast_table_results)
51
+ add_plugin_stats('execution_time','parse',Time.now-t1)
52
+
53
+
54
+ # t1=Time.now
55
+ # blast_table_results = blast.do_blast(fastas,:xml,false)
56
+ # add_plugin_stats('execution_time','blast',Time.now-t1)
57
+
58
+ # t1=Time.now
59
+ # blast_table_results = BlastStreamxmlResult.new(blast_table_results)
60
+ # add_plugin_stats('execution_time','parse',Time.now-t1)
33
61
 
34
62
  # puts blast_table_results.inspect
35
63
 
@@ -27,7 +27,7 @@ class PluginAdapters < Plugin
27
27
 
28
28
  # fastas=fastas.join("\n")
29
29
 
30
- blast_table_results = blast.do_blast(fastas,:xml)
30
+ blast_table_results = blast.do_blast(fastas,:table)
31
31
 
32
32
  # puts blast_table_results.inspect
33
33
 
@@ -23,12 +23,22 @@ class PluginContaminants < Plugin
23
23
  # find MIDS with less results than max_target_seqs value
24
24
  # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
25
25
 
26
+ # This message is due to short sequences
27
+ #Warning: Could not calculate ungapped Karlin-Altschul parameters due to an invalid query sequence or its translation. Please verify the query sequence(s) and/or filtering options
28
+
26
29
  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
27
30
  # y una secuencia de baja complejidad como entrada
28
31
 
29
- blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
32
+ task_template=@params.get_param('blast_task_template_contaminants')
33
+ extra_params=@params.get_param('blast_extra_params_contaminants')
34
+
35
+ extra_params=extra_params.gsub(/^\"|\"?$/, '')
36
+
37
+ #blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
38
+
39
+ blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
30
40
 
31
- $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
41
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
32
42
 
33
43
  fastas=[]
34
44
 
@@ -43,7 +53,16 @@ class PluginContaminants < Plugin
43
53
  # $LOG.info(fastas)
44
54
  # $LOG.info('-'*20)
45
55
 
46
- blast_table_results = blast.do_blast(fastas,:xml)
56
+ #blast_table_results = blast.do_blast(fastas,:xml)
57
+ t1=Time.now
58
+ #blast_table_results = blast.do_blast(fastas,:xml,false)
59
+ blast_table_results = blast.do_blast(fastas,:table,false)
60
+ add_plugin_stats('execution_time','blast',Time.now-t1)
61
+
62
+ t1=Time.now
63
+ #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
64
+ blast_table_results = BlastTableResult.new(blast_table_results)
65
+ add_plugin_stats('execution_time','parse',Time.now-t1)
47
66
 
48
67
  # $LOG.info(blast_table_results.inspect)
49
68
 
@@ -62,12 +81,14 @@ class PluginContaminants < Plugin
62
81
  return
63
82
  end
64
83
 
84
+ #if blast_query.query_def != seq.seq_name
65
85
  if blast_query.query_id != seq.seq_name
66
- # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
86
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
67
87
  end
68
88
 
69
89
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
70
90
 
91
+ #add_plugin_stats('hsp_count',seq.seq_name,blast_query.hits.count)
71
92
 
72
93
  #blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
73
94
  # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
@@ -195,11 +216,19 @@ class PluginContaminants < Plugin
195
216
  default_value = 'true'
196
217
  params.check_param(errors,'contaminants_reject','String',default_value,comment)
197
218
 
198
-
199
219
  comment='Path for contaminants database'
200
220
  default_value = File.join($FORMATTED_DB_PATH,'contaminants.fasta')
201
221
  params.check_param(errors,'contaminants_db','DB',default_value,comment)
202
222
 
223
+ comment='Blast task template for contaminations'
224
+ #default_value = 'blastn'
225
+ default_value = 'megablast'
226
+ params.check_param(errors,'blast_task_template_contaminants','String',default_value,comment)
227
+
228
+ comment='Blast extra params for contaminations'
229
+ #default_value = ''
230
+ default_value = '"-word_size=22"'
231
+ params.check_param(errors,'blast_extra_params_contaminants','String',default_value,comment)
203
232
 
204
233
  return errors
205
234
  end
@@ -73,6 +73,7 @@ class PluginLowComplexity < Plugin
73
73
 
74
74
  if !actions.empty?
75
75
  add_stats('low_complexity',total_dust)
76
+ seq.add_file_tag(0, 'low_complexity', :both, 100)
76
77
  seq.add_actions(actions)
77
78
  end
78
79
 
@@ -92,6 +93,7 @@ class PluginLowComplexity < Plugin
92
93
  # default_value = 80
93
94
  # params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
94
95
  #
96
+
95
97
  return errors
96
98
  end
97
99
 
@@ -170,9 +170,10 @@ class PluginLowQuality < Plugin
170
170
  default_value = 20
171
171
  params.check_param(errors,'min_quality','Integer',default_value,comment)
172
172
 
173
- comment='Quality window for scanning low quality segments'
174
- default_value = 15
175
- params.check_param(errors,'window_width','Integer',default_value,comment)
173
+
174
+ #comment='Quality window for scanning low quality segments'
175
+ #default_value = 15
176
+ #params.check_param(errors,'window_width','Integer',default_value,comment)
176
177
 
177
178
 
178
179
  comment='Minimum length of a bad quality segment inside the sequence'
@@ -43,9 +43,14 @@ class PluginUserContaminants < Plugin
43
43
  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
44
44
  # y una secuencia de baja complejidad como entrada
45
45
 
46
- blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
46
+ task_template=@params.get_param('blast_task_template_user_contaminants')
47
+ extra_params=@params.get_param('blast_extra_params_user_contaminants')
47
48
 
48
- $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
49
+ extra_params=extra_params.gsub(/^\"|\"?$/, '')
50
+
51
+ blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
52
+
53
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:table))
49
54
 
50
55
  fastas=[]
51
56
 
@@ -55,7 +60,16 @@ class PluginUserContaminants < Plugin
55
60
  end
56
61
 
57
62
 
58
- blast_table_results = blast.do_blast(fastas,:xml)
63
+ #blast_table_results = blast.do_blast(fastas,:xml)
64
+ t1=Time.now
65
+ blast_table_results = blast.do_blast(fastas,:table,false)
66
+ add_plugin_stats('execution_time','blast',Time.now-t1)
67
+
68
+ t1=Time.now
69
+ #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
70
+ blast_table_results = BlastTableResult.new(blast_table_results)
71
+ add_plugin_stats('execution_time','parse',Time.now-t1)
72
+
59
73
 
60
74
  return blast_table_results
61
75
  end
@@ -63,7 +77,7 @@ class PluginUserContaminants < Plugin
63
77
 
64
78
  def exec_seq(seq,blast_query)
65
79
  if blast_query.query_id != seq.seq_name
66
- # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
80
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
67
81
  end
68
82
 
69
83
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"
@@ -144,6 +158,16 @@ class PluginUserContaminants < Plugin
144
158
  default_value = "" #File.join($FORMATTED_DB_PATH,'user_contaminant.fasta')
145
159
  params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
146
160
 
161
+ comment='Blast task template for user contaminations'
162
+ #default_value = 'blastn'
163
+ default_value = 'megablast'
164
+ params.check_param(errors,'blast_task_template_user_contaminants','String',default_value,comment)
165
+
166
+ comment='Blast extra params for user contaminations'
167
+ #default_value = ''
168
+ default_value = '"-word_size=22"'
169
+ params.check_param(errors,'blast_extra_params_user_contaminants','String',default_value,comment)
170
+
147
171
  return errors
148
172
  end
149
173
 
@@ -40,7 +40,17 @@ class PluginVectors < Plugin
40
40
 
41
41
  # fastas=fastas.join("\n")
42
42
 
43
- blast_table_results = blast.do_blast(fastas,:xml)
43
+ #blast_table_results = blast.do_blast(fastas,:xml)
44
+
45
+ t1=Time.now
46
+ blast_table_results = blast.do_blast(fastas,:table,false)
47
+ add_plugin_stats('execution_time','blast',Time.now-t1)
48
+
49
+ t1=Time.now
50
+ #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
51
+ blast_table_results = BlastTableResult.new(blast_table_results)
52
+ add_plugin_stats('execution_time','parse',Time.now-t1)
53
+
44
54
 
45
55
  # puts blast_table_results.inspect
46
56
 
@@ -50,7 +60,7 @@ class PluginVectors < Plugin
50
60
 
51
61
  def exec_seq(seq,blast_query)
52
62
  if blast_query.query_id != seq.seq_name
53
- # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
63
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
54
64
  end
55
65
 
56
66
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
@@ -1,4 +1,4 @@
1
1
  module Seqtrimnext
2
- VERSION = "2.0.62"
2
+ VERSION = "2.0.66"
3
3
  SEQTRIM_VERSION = VERSION
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.62
4
+ version: 2.0.66
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dario Guerrero
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-20 00:00:00.000000000 Z
12
+ date: 2016-05-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -219,6 +219,7 @@ executables:
219
219
  - resume_clusters.rb
220
220
  - resume_execution_times.rb
221
221
  - resume_rejected.rb
222
+ - resume_stn_contaminants.rb
222
223
  - resume_stn_stats.rb
223
224
  - reverse_paired.rb
224
225
  - seqtrimnext
@@ -252,6 +253,7 @@ files:
252
253
  - bin/resume_clusters.rb
253
254
  - bin/resume_execution_times.rb
254
255
  - bin/resume_rejected.rb
256
+ - bin/resume_stn_contaminants.rb
255
257
  - bin/resume_stn_stats.rb
256
258
  - bin/reverse_paired.rb
257
259
  - bin/seqtrimnext
@@ -380,7 +382,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
380
382
  version: '0'
381
383
  requirements: []
382
384
  rubyforge_project:
383
- rubygems_version: 2.4.4
385
+ rubygems_version: 2.4.8
384
386
  signing_key:
385
387
  specification_version: 4
386
388
  summary: Sequences preprocessing and cleaning software