seqtrimnext 2.0.59 → 2.0.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a3858c7d16c231f2bd9a8a20a79e71793fa4a26b
4
+ data.tar.gz: 8737fb4a873639ccf4784fa0b0f4add4778df20f
5
+ SHA512:
6
+ metadata.gz: 8885fc85c63703652371ecc1a4bc67670de700b8151ba0cf3e3e706904b979a4eeb777b289c2553b229832a556887572c0c8760978f7f6306b099a085a850ab1
7
+ data.tar.gz: a9c3c18e94a240720afd44840ba297259ec888c28381ec4855eec68b38dba3387b76df33c5211b2a91f9fec5d23e03645b792ae4fa71e8c82ea36dfc965e2e1e
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in seqtrimnext.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 dariogf
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
File without changes
data/Rakefile CHANGED
@@ -1,39 +1,8 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.1.0'
3
- require 'hoe'
4
- require 'fileutils'
5
- require './lib/seqtrimnext'
6
-
7
- Hoe.plugin :newgem
8
- # Hoe.plugin :website
9
- # Hoe.plugin :cucumberfeatures
10
-
11
- # Generate all the Rake tasks
12
- # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
- $hoe = Hoe.spec 'seqtrimnext' do
14
- self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
15
- self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
- self.rubyforge_name = self.name # TODO this is default value
17
- # self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
18
-
19
- self.extra_deps = []
20
- self.extra_deps << ['narray','>=0']
21
- self.extra_deps << ['gnuplot','>=0']
22
- self.extra_deps << ['term-ansicolor','>=1.0.5']
23
- self.extra_deps << ['xml-simple','>=1.0.12']
24
- self.extra_deps << ['scbi_blast','>=0.0.34']
25
- self.extra_deps << ['scbi_mapreduce','>=0.0.38']
26
- self.extra_deps << ['scbi_fasta','>=0.1.7']
27
- self.extra_deps << ['scbi_fastq','>=0.0.18']
28
- self.extra_deps << ['scbi_plot','>=0.0.6']
29
- self.extra_deps << ['scbi_math','>=0.0.1']
30
- self.extra_deps << ['scbi_headers','>=0.0.2']
31
-
32
- end
33
-
34
- require 'newgem/tasks'
35
- Dir['tasks/**/*.rake'].each { |t| load t }
36
-
37
- # TODO - want other tests/tasks run by default? Add them to the list
38
- # remove_task :default
39
- # task :default => [:spec, :features, :redocs]
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
data/bin/seqtrimnext CHANGED
@@ -119,7 +119,7 @@ require 'install_requirements'
119
119
 
120
120
  ins = InstallRequirements.new
121
121
  if (!ins.check_install_requirements)
122
- exit
122
+ exit(-1)
123
123
  end
124
124
 
125
125
  require "logger"
@@ -378,7 +378,7 @@ end
378
378
 
379
379
  if !File.exists?($FORMATTED_DB_PATH)
380
380
  STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
381
- exit
381
+ exit(-1)
382
382
  end
383
383
 
384
384
 
@@ -395,7 +395,7 @@ if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
395
395
  puts "You must provide all required options"
396
396
  puts ""
397
397
  puts optparse.help
398
- exit
398
+ exit(-1)
399
399
  end
400
400
 
401
401
  # check for template
@@ -405,7 +405,7 @@ if (!File.exists?(options[:template]))
405
405
  else
406
406
  $LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
407
407
  puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
408
- exit
408
+ exit(-1)
409
409
  end
410
410
  end
411
411
  $LOG.info "Using init file: #{$SEQTRIMNEXT_INIT}"
@@ -418,7 +418,7 @@ if options[:fastq]
418
418
  # fastq file
419
419
  if (!fastq_file.nil? && fastq_file!='-' && !File.exists?(File.expand_path(fastq_file)))
420
420
  $LOG.error "Input file: #{fastq_file} doesn't exists"
421
- exit
421
+ exit(-1)
422
422
  end
423
423
  end
424
424
  end
@@ -426,13 +426,13 @@ end
426
426
  # fasta file
427
427
  if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
428
428
  $LOG.error "Input file: #{options[:fasta]} doesn't exists"
429
- exit
429
+ exit(-1)
430
430
  end
431
431
 
432
432
  # qual file
433
433
  if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
434
434
  $LOG.error "Input file: #{options[:qual]} doesn't exists"
435
- exit
435
+ exit(-1)
436
436
  end
437
437
 
438
438
  s = Seqtrim.new(options)
@@ -19,7 +19,7 @@
19
19
  #
20
20
  # $: << File.expand_path(ROOT_PATH)
21
21
 
22
- $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
22
+ # $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
23
23
  # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
24
24
 
25
25
  require 'seqtrimnext'
@@ -196,7 +196,7 @@ class ExtractStats
196
196
 
197
197
  # sequence min size, is taken directly from params file
198
198
  # max sequence limit is calculated here
199
- if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences')=='true')
199
+ if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences').to_s=='true')
200
200
 
201
201
  # high size limit is calculated with stats
202
202
  @params.set_param('max_sequence_size_raw',(@sequence_lengths_stats.max+10).to_i)
@@ -262,7 +262,7 @@ class Seqtrim
262
262
  end
263
263
 
264
264
  # Extract global stats
265
- if params.get_param('generate_initial_stats')=='true'
265
+ if params.get_param('generate_initial_stats').to_s=='true'
266
266
  $LOG.info "Calculatings stats"
267
267
  ExtractStats.new(sequence_readers,params)
268
268
  else
@@ -273,7 +273,7 @@ class Seqtrim
273
273
  # save used params to file
274
274
  params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
275
275
 
276
- piro_on = (params.get_param('next_generation_sequences')=='true')
276
+ piro_on = (params.get_param('next_generation_sequences').to_s=='true')
277
277
 
278
278
  params.load_mids(params.get_param('mids_db'))
279
279
  params.load_ab_adapters(params.get_param('adapters_ab_db'))
@@ -281,7 +281,7 @@ class Seqtrim
281
281
  params.load_linkers(params.get_param('linkers_db'))
282
282
 
283
283
  #execute cd-hit
284
- if params.get_param('remove_clonality')=='true'
284
+ if params.get_param('remove_clonality').to_s=='true'
285
285
  cmd=get_custom_cdhit(cd_hit_input_file,params)
286
286
  if cmd.empty?
287
287
  cmd=get_cd_hit_cmd(cd_hit_input_file,workers,$SEQTRIMNEXT_INIT)
Binary file
@@ -154,7 +154,7 @@ class PluginContaminants < Plugin
154
154
 
155
155
 
156
156
 
157
- if ((not actions.empty? ) && (reject=='true'))
157
+ if ((not actions.empty? ) && (reject.to_s=='true'))
158
158
  #reject sequence
159
159
  # puts "SEQ_REJECTED= TRUE >>> "
160
160
  seq.seq_rejected=true
@@ -100,7 +100,7 @@ class PluginIndeterminations < Plugin
100
100
  a.right_action=true
101
101
  actions.push a
102
102
 
103
- if @params.get_param('middle_indetermination_rejects')=='true'
103
+ if @params.get_param('middle_indetermination_rejects').to_s=='true'
104
104
  seq.seq_rejected=true
105
105
  seq.seq_rejected_by_message='Indeterminations in middle of sequence'
106
106
  end
@@ -119,7 +119,7 @@ class PluginLowQuality < Plugin
119
119
 
120
120
  if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
121
121
  $LOG.debug " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
122
- elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual')=='true'))
122
+ elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual').to_s=='true'))
123
123
 
124
124
  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
125
125
 
@@ -101,7 +101,7 @@ class PluginVectors < Plugin
101
101
  # puts " near #{near_to_extrem(v,seq,min_vector_size)} #{vector_size}>=#{min_vector_size}"
102
102
  #c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1) #if ab adapter is very near to the end of original sequence
103
103
 
104
- piro_on=@params.get_param('next_generation_sequences')
104
+ piro_on=@params.get_param('next_generation_sequences').to_s
105
105
 
106
106
  if (((piro_on=='true') && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionLinker)) && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionMultipleLinker))) || # if vectors DB not is contained inside detected linkers
107
107
  (piro_on=='false'))
@@ -110,7 +110,7 @@ class PluginVectors < Plugin
110
110
  if !near_to_extrem(v,seq,min_vector_size)
111
111
  type = 'ActionUnexpectedVector'
112
112
 
113
- if @params.get_param('middle_vector_rejects')=='true'
113
+ if @params.get_param('middle_vector_rejects').to_s=='true'
114
114
  seq.seq_rejected=true
115
115
  seq.seq_rejected_by_message='unexpected vector'
116
116
  end
@@ -0,0 +1,156 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAdaptersOld < Plugin
11
+
12
+ def get_type_adapter(p_start,p_end,seq)
13
+ #if q_beg is nearer the left, add adapter action by the left,
14
+ #if q_end esta is nearer the right , add adapter action by the right
15
+ #NOTE: If the adapter is very near from left and rigth,
16
+ #then the sequence isn't valid, because almost sequence is adapter.
17
+
18
+
19
+ v1= p_end.to_i
20
+ v2= p_start.to_i
21
+
22
+ # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
+
24
+ # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
+ if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
+ type = "ActionLeftAdapter"
27
+
28
+ else
29
+ type = "ActionRightAdapter"
30
+
31
+ end
32
+ return type
33
+ end
34
+
35
+
36
+ def cut_by_right(adapter,seq)
37
+
38
+ left_size = adapter.q_beg-seq.insert_start+1
39
+ right_size = seq.insert_end-adapter.q_end+1
40
+ left_size=0 if (left_size<0)
41
+ right_size=0 if (right_size<0)
42
+
43
+ return (left_size>(right_size/2).to_i)
44
+
45
+ end
46
+
47
+ def do_blasts(seqs)
48
+ # find MIDS with less results than max_target_seqs value
49
+ blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
50
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
51
+
52
+ fastas=[]
53
+
54
+ seqs.each do |seq|
55
+ fastas.push ">"+seq.seq_name
56
+ fastas.push seq.seq_fasta
57
+ end
58
+
59
+ # fastas=fastas.join("\n")
60
+
61
+ blast_table_results = blast.do_blast(fastas)
62
+
63
+ # puts blast_table_results.inspect
64
+
65
+ return blast_table_results
66
+ end
67
+
68
+
69
+ def exec_seq(seq,blast_query)
70
+ if blast_query.query_id != seq.seq_name
71
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
72
+ end
73
+
74
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
75
+
76
+
77
+ # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
78
+
79
+ # blast with only one sequence, no with many sequences from a database
80
+ #---------------------------------------------------------------------
81
+
82
+ # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
83
+
84
+ #blast_table_results = BlastTableResult.new(res)
85
+
86
+ # blast_table_results.inspect
87
+
88
+ adapters=[]
89
+ # blast_table_results.querys.each do |query| # first round to save adapters without overlap
90
+ merge_hits(blast_query,adapters)
91
+ # end
92
+
93
+ begin
94
+ adapters2=adapters # second round to save adapters without overlap
95
+ adapters = []
96
+ merge_hits(adapters2,adapters)
97
+ end until (adapters2.count == adapters.count)
98
+
99
+ actions=[]
100
+ adapter_size=0
101
+ # @stats['adapter_size']={}
102
+ adapters.each do |ad| # adds the correspondent action to the sequence
103
+
104
+ type = get_type_adapter(ad.q_beg,ad.q_end,seq)
105
+ a = seq.new_action(ad.q_beg,ad.q_end,type)
106
+ # puts " state left_action #{a.left_action} right_action #{a.right_action}"
107
+
108
+
109
+ adapter_size=ad.q_end-ad.q_beg+1
110
+
111
+ if cut_by_right(ad,seq)
112
+
113
+ # puts "action right end1 #{seq.insert_end}"
114
+
115
+ a.right_action=true #mark rigth action to get the left insert
116
+ else
117
+
118
+ # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
119
+
120
+ a.left_action = true #mark left action to get the right insert
121
+
122
+ end
123
+
124
+ a.message = ad.subject_id
125
+ a.reversed = ad.reversed
126
+ actions.push a
127
+
128
+ # @stats[:adapter_size]={adapter_size => 1}
129
+ add_stats('adapter_size',adapter_size)
130
+
131
+ end
132
+ seq.add_actions(actions)
133
+ #
134
+ end
135
+
136
+ #Returns an array with the errors due to parameters are missing
137
+ def self.check_params(params)
138
+ errors=[]
139
+
140
+ comment='Blast E-value used as cut-off when searching for adapters or primers'
141
+ default_value = 1e-6
142
+ params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
143
+
144
+ comment='Minimum required identity (%) for a reliable adapter'
145
+ default_value = 95
146
+ params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
147
+
148
+ comment='Path for adapter database'
149
+ default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
150
+ params.check_param(errors,'adapters_db','DB',default_value,comment)
151
+
152
+ return errors
153
+ end
154
+
155
+
156
+ end