seqtrimnext 2.0.59 → 2.0.60

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a3858c7d16c231f2bd9a8a20a79e71793fa4a26b
4
+ data.tar.gz: 8737fb4a873639ccf4784fa0b0f4add4778df20f
5
+ SHA512:
6
+ metadata.gz: 8885fc85c63703652371ecc1a4bc67670de700b8151ba0cf3e3e706904b979a4eeb777b289c2553b229832a556887572c0c8760978f7f6306b099a085a850ab1
7
+ data.tar.gz: a9c3c18e94a240720afd44840ba297259ec888c28381ec4855eec68b38dba3387b76df33c5211b2a91f9fec5d23e03645b792ae4fa71e8c82ea36dfc965e2e1e
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in seqtrimnext.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 dariogf
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
File without changes
data/Rakefile CHANGED
@@ -1,39 +1,8 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.1.0'
3
- require 'hoe'
4
- require 'fileutils'
5
- require './lib/seqtrimnext'
6
-
7
- Hoe.plugin :newgem
8
- # Hoe.plugin :website
9
- # Hoe.plugin :cucumberfeatures
10
-
11
- # Generate all the Rake tasks
12
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
- $hoe = Hoe.spec 'seqtrimnext' do
14
- self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
15
- self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
- self.rubyforge_name = self.name # TODO this is default value
17
- # self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
18
-
19
- self.extra_deps = []
20
- self.extra_deps << ['narray','>=0']
21
- self.extra_deps << ['gnuplot','>=0']
22
- self.extra_deps << ['term-ansicolor','>=1.0.5']
23
- self.extra_deps << ['xml-simple','>=1.0.12']
24
- self.extra_deps << ['scbi_blast','>=0.0.34']
25
- self.extra_deps << ['scbi_mapreduce','>=0.0.38']
26
- self.extra_deps << ['scbi_fasta','>=0.1.7']
27
- self.extra_deps << ['scbi_fastq','>=0.0.18']
28
- self.extra_deps << ['scbi_plot','>=0.0.6']
29
- self.extra_deps << ['scbi_math','>=0.0.1']
30
- self.extra_deps << ['scbi_headers','>=0.0.2']
31
-
32
- end
33
-
34
- require 'newgem/tasks'
35
- Dir['tasks/**/*.rake'].each { |t| load t }
36
-
37
- # TODO - want other tests/tasks run by default? Add them to the list
38
- # remove_task :default
39
- # task :default => [:spec, :features, :redocs]
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
data/bin/seqtrimnext CHANGED
@@ -119,7 +119,7 @@ require 'install_requirements'
119
119
 
120
120
  ins = InstallRequirements.new
121
121
  if (!ins.check_install_requirements)
122
- exit
122
+ exit(-1)
123
123
  end
124
124
 
125
125
  require "logger"
@@ -378,7 +378,7 @@ end
378
378
 
379
379
  if !File.exists?($FORMATTED_DB_PATH)
380
380
  STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
381
- exit
381
+ exit(-1)
382
382
  end
383
383
 
384
384
 
@@ -395,7 +395,7 @@ if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
395
395
  puts "You must provide all required options"
396
396
  puts ""
397
397
  puts optparse.help
398
- exit
398
+ exit(-1)
399
399
  end
400
400
 
401
401
  # check for template
@@ -405,7 +405,7 @@ if (!File.exists?(options[:template]))
405
405
  else
406
406
  $LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
407
407
  puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
408
- exit
408
+ exit(-1)
409
409
  end
410
410
  end
411
411
  $LOG.info "Using init file: #{$SEQTRIMNEXT_INIT}"
@@ -418,7 +418,7 @@ if options[:fastq]
418
418
  # fastq file
419
419
  if (!fastq_file.nil? && fastq_file!='-' && !File.exists?(File.expand_path(fastq_file)))
420
420
  $LOG.error "Input file: #{fastq_file} doesn't exists"
421
- exit
421
+ exit(-1)
422
422
  end
423
423
  end
424
424
  end
@@ -426,13 +426,13 @@ end
426
426
  # fasta file
427
427
  if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
428
428
  $LOG.error "Input file: #{options[:fasta]} doesn't exists"
429
- exit
429
+ exit(-1)
430
430
  end
431
431
 
432
432
  # qual file
433
433
  if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
434
434
  $LOG.error "Input file: #{options[:qual]} doesn't exists"
435
- exit
435
+ exit(-1)
436
436
  end
437
437
 
438
438
  s = Seqtrim.new(options)
@@ -19,7 +19,7 @@
19
19
  #
20
20
  # $: << File.expand_path(ROOT_PATH)
21
21
 
22
- $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
22
+ # $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
23
23
  # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
24
24
 
25
25
  require 'seqtrimnext'
@@ -196,7 +196,7 @@ class ExtractStats
196
196
 
197
197
  # sequence min size, is taken directly from params file
198
198
  # max sequence limit is calculated here
199
- if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences')=='true')
199
+ if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences').to_s=='true')
200
200
 
201
201
  # high size limit is calculated with stats
202
202
  @params.set_param('max_sequence_size_raw',(@sequence_lengths_stats.max+10).to_i)
@@ -262,7 +262,7 @@ class Seqtrim
262
262
  end
263
263
 
264
264
  # Extract global stats
265
- if params.get_param('generate_initial_stats')=='true'
265
+ if params.get_param('generate_initial_stats').to_s=='true'
266
266
  $LOG.info "Calculatings stats"
267
267
  ExtractStats.new(sequence_readers,params)
268
268
  else
@@ -273,7 +273,7 @@ class Seqtrim
273
273
  # save used params to file
274
274
  params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
275
275
 
276
- piro_on = (params.get_param('next_generation_sequences')=='true')
276
+ piro_on = (params.get_param('next_generation_sequences').to_s=='true')
277
277
 
278
278
  params.load_mids(params.get_param('mids_db'))
279
279
  params.load_ab_adapters(params.get_param('adapters_ab_db'))
@@ -281,7 +281,7 @@ class Seqtrim
281
281
  params.load_linkers(params.get_param('linkers_db'))
282
282
 
283
283
  #execute cd-hit
284
- if params.get_param('remove_clonality')=='true'
284
+ if params.get_param('remove_clonality').to_s=='true'
285
285
  cmd=get_custom_cdhit(cd_hit_input_file,params)
286
286
  if cmd.empty?
287
287
  cmd=get_cd_hit_cmd(cd_hit_input_file,workers,$SEQTRIMNEXT_INIT)
Binary file
@@ -154,7 +154,7 @@ class PluginContaminants < Plugin
154
154
 
155
155
 
156
156
 
157
- if ((not actions.empty? ) && (reject=='true'))
157
+ if ((not actions.empty? ) && (reject.to_s=='true'))
158
158
  #reject sequence
159
159
  # puts "SEQ_REJECTED= TRUE >>> "
160
160
  seq.seq_rejected=true
@@ -100,7 +100,7 @@ class PluginIndeterminations < Plugin
100
100
  a.right_action=true
101
101
  actions.push a
102
102
 
103
- if @params.get_param('middle_indetermination_rejects')=='true'
103
+ if @params.get_param('middle_indetermination_rejects').to_s=='true'
104
104
  seq.seq_rejected=true
105
105
  seq.seq_rejected_by_message='Indeterminations in middle of sequence'
106
106
  end
@@ -119,7 +119,7 @@ class PluginLowQuality < Plugin
119
119
 
120
120
  if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
121
121
  $LOG.debug " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
122
- elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual')=='true'))
122
+ elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual').to_s=='true'))
123
123
 
124
124
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
125
125
 
@@ -101,7 +101,7 @@ class PluginVectors < Plugin
101
101
  # puts " near #{near_to_extrem(v,seq,min_vector_size)} #{vector_size}>=#{min_vector_size}"
102
102
  #c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1) #if ab adapter is very near to the end of original sequence
103
103
 
104
- piro_on=@params.get_param('next_generation_sequences')
104
+ piro_on=@params.get_param('next_generation_sequences').to_s
105
105
 
106
106
  if (((piro_on=='true') && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionLinker)) && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionMultipleLinker))) || # if vectors DB not is contained inside detected linkers
107
107
  (piro_on=='false'))
@@ -110,7 +110,7 @@ class PluginVectors < Plugin
110
110
  if !near_to_extrem(v,seq,min_vector_size)
111
111
  type = 'ActionUnexpectedVector'
112
112
 
113
- if @params.get_param('middle_vector_rejects')=='true'
113
+ if @params.get_param('middle_vector_rejects').to_s=='true'
114
114
  seq.seq_rejected=true
115
115
  seq.seq_rejected_by_message='unexpected vector'
116
116
  end
@@ -0,0 +1,156 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAdaptersOld < Plugin
11
+
12
+ def get_type_adapter(p_start,p_end,seq)
13
+ #if q_beg is nearer the left, add adapter action by the left,
14
+ #if q_end esta is nearer the right , add adapter action by the right
15
+ #NOTE: If the adapter is very near from left and rigth,
16
+ #then the sequence isn't valid, because almost sequence is adapter.
17
+
18
+
19
+ v1= p_end.to_i
20
+ v2= p_start.to_i
21
+
22
+ # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
+
24
+ # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
+ if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
+ type = "ActionLeftAdapter"
27
+
28
+ else
29
+ type = "ActionRightAdapter"
30
+
31
+ end
32
+ return type
33
+ end
34
+
35
+
36
+ def cut_by_right(adapter,seq)
37
+
38
+ left_size = adapter.q_beg-seq.insert_start+1
39
+ right_size = seq.insert_end-adapter.q_end+1
40
+ left_size=0 if (left_size<0)
41
+ right_size=0 if (right_size<0)
42
+
43
+ return (left_size>(right_size/2).to_i)
44
+
45
+ end
46
+
47
+ def do_blasts(seqs)
48
+ # find MIDS with less results than max_target_seqs value
49
+ blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
50
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
51
+
52
+ fastas=[]
53
+
54
+ seqs.each do |seq|
55
+ fastas.push ">"+seq.seq_name
56
+ fastas.push seq.seq_fasta
57
+ end
58
+
59
+ # fastas=fastas.join("\n")
60
+
61
+ blast_table_results = blast.do_blast(fastas)
62
+
63
+ # puts blast_table_results.inspect
64
+
65
+ return blast_table_results
66
+ end
67
+
68
+
69
+ def exec_seq(seq,blast_query)
70
+ if blast_query.query_id != seq.seq_name
71
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
72
+ end
73
+
74
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
75
+
76
+
77
+ # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
78
+
79
+ # blast with only one sequence, no with many sequences from a database
80
+ #---------------------------------------------------------------------
81
+
82
+ # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
83
+
84
+ #blast_table_results = BlastTableResult.new(res)
85
+
86
+ # blast_table_results.inspect
87
+
88
+ adapters=[]
89
+ # blast_table_results.querys.each do |query| # first round to save adapters without overlap
90
+ merge_hits(blast_query,adapters)
91
+ # end
92
+
93
+ begin
94
+ adapters2=adapters # second round to save adapters without overlap
95
+ adapters = []
96
+ merge_hits(adapters2,adapters)
97
+ end until (adapters2.count == adapters.count)
98
+
99
+ actions=[]
100
+ adapter_size=0
101
+ # @stats['adapter_size']={}
102
+ adapters.each do |ad| # adds the correspondent action to the sequence
103
+
104
+ type = get_type_adapter(ad.q_beg,ad.q_end,seq)
105
+ a = seq.new_action(ad.q_beg,ad.q_end,type)
106
+ # puts " state left_action #{a.left_action} right_action #{a.right_action}"
107
+
108
+
109
+ adapter_size=ad.q_end-ad.q_beg+1
110
+
111
+ if cut_by_right(ad,seq)
112
+
113
+ # puts "action right end1 #{seq.insert_end}"
114
+
115
+ a.right_action=true #mark rigth action to get the left insert
116
+ else
117
+
118
+ # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
119
+
120
+ a.left_action = true #mark left action to get the right insert
121
+
122
+ end
123
+
124
+ a.message = ad.subject_id
125
+ a.reversed = ad.reversed
126
+ actions.push a
127
+
128
+ # @stats[:adapter_size]={adapter_size => 1}
129
+ add_stats('adapter_size',adapter_size)
130
+
131
+ end
132
+ seq.add_actions(actions)
133
+ #
134
+ end
135
+
136
+ #Returns an array with the errors due to parameters are missing
137
+ def self.check_params(params)
138
+ errors=[]
139
+
140
+ comment='Blast E-value used as cut-off when searching for adapters or primers'
141
+ default_value = 1e-6
142
+ params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
143
+
144
+ comment='Minimum required identity (%) for a reliable adapter'
145
+ default_value = 95
146
+ params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
147
+
148
+ comment='Path for adapter database'
149
+ default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
150
+ params.check_param(errors,'adapters_db','DB',default_value,comment)
151
+
152
+ return errors
153
+ end
154
+
155
+
156
+ end