seqtrimnext 2.0.59 → 2.0.60
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +8 -39
- data/bin/seqtrimnext +7 -7
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +1 -1
- data/lib/seqtrimnext/classes/extract_stats.rb +1 -1
- data/lib/seqtrimnext/classes/seqtrim.rb +3 -3
- data/lib/seqtrimnext/plugins/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +2 -2
- data/lib/seqtrimnext/plugins_old/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins_old/plugin_adapters_old.rb +156 -0
- data/lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb +382 -0
- data/lib/seqtrimnext/plugins_old/plugin_rem_adit_artifacts.rb +234 -0
- data/lib/seqtrimnext/version.rb +4 -0
- data/lib/seqtrimnext.rb +2 -16
- data/seqtrimnext.gemspec +38 -0
- metadata +95 -156
- data/.gemtest +0 -0
- data/History.txt +0 -130
- data/Manifest.txt +0 -125
- data/PostInstall.txt +0 -7
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_helper.rb +0 -3
- data/test/test_seqtrimnext.rb +0 -11
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a3858c7d16c231f2bd9a8a20a79e71793fa4a26b
|
4
|
+
data.tar.gz: 8737fb4a873639ccf4784fa0b0f4add4778df20f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8885fc85c63703652371ecc1a4bc67670de700b8151ba0cf3e3e706904b979a4eeb777b289c2553b229832a556887572c0c8760978f7f6306b099a085a850ab1
|
7
|
+
data.tar.gz: a9c3c18e94a240720afd44840ba297259ec888c28381ec4855eec68b38dba3387b76df33c5211b2a91f9fec5d23e03645b792ae4fa71e8c82ea36dfc965e2e1e
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 dariogf
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
File without changes
|
data/Rakefile
CHANGED
@@ -1,39 +1,8 @@
|
|
1
|
-
require
|
2
|
-
|
3
|
-
require '
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# Hoe.plugin :cucumberfeatures
|
10
|
-
|
11
|
-
# Generate all the Rake tasks
|
12
|
-
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
-
$hoe = Hoe.spec 'seqtrimnext' do
|
14
|
-
self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
|
15
|
-
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
16
|
-
self.rubyforge_name = self.name # TODO this is default value
|
17
|
-
# self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
|
18
|
-
|
19
|
-
self.extra_deps = []
|
20
|
-
self.extra_deps << ['narray','>=0']
|
21
|
-
self.extra_deps << ['gnuplot','>=0']
|
22
|
-
self.extra_deps << ['term-ansicolor','>=1.0.5']
|
23
|
-
self.extra_deps << ['xml-simple','>=1.0.12']
|
24
|
-
self.extra_deps << ['scbi_blast','>=0.0.34']
|
25
|
-
self.extra_deps << ['scbi_mapreduce','>=0.0.38']
|
26
|
-
self.extra_deps << ['scbi_fasta','>=0.1.7']
|
27
|
-
self.extra_deps << ['scbi_fastq','>=0.0.18']
|
28
|
-
self.extra_deps << ['scbi_plot','>=0.0.6']
|
29
|
-
self.extra_deps << ['scbi_math','>=0.0.1']
|
30
|
-
self.extra_deps << ['scbi_headers','>=0.0.2']
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
require 'newgem/tasks'
|
35
|
-
Dir['tasks/**/*.rake'].each { |t| load t }
|
36
|
-
|
37
|
-
# TODO - want other tests/tasks run by default? Add them to the list
|
38
|
-
# remove_task :default
|
39
|
-
# task :default => [:spec, :features, :redocs]
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = "test/*_test.rb"
|
8
|
+
end
|
data/bin/seqtrimnext
CHANGED
@@ -119,7 +119,7 @@ require 'install_requirements'
|
|
119
119
|
|
120
120
|
ins = InstallRequirements.new
|
121
121
|
if (!ins.check_install_requirements)
|
122
|
-
exit
|
122
|
+
exit(-1)
|
123
123
|
end
|
124
124
|
|
125
125
|
require "logger"
|
@@ -378,7 +378,7 @@ end
|
|
378
378
|
|
379
379
|
if !File.exists?($FORMATTED_DB_PATH)
|
380
380
|
STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
|
381
|
-
exit
|
381
|
+
exit(-1)
|
382
382
|
end
|
383
383
|
|
384
384
|
|
@@ -395,7 +395,7 @@ if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
|
|
395
395
|
puts "You must provide all required options"
|
396
396
|
puts ""
|
397
397
|
puts optparse.help
|
398
|
-
exit
|
398
|
+
exit(-1)
|
399
399
|
end
|
400
400
|
|
401
401
|
# check for template
|
@@ -405,7 +405,7 @@ if (!File.exists?(options[:template]))
|
|
405
405
|
else
|
406
406
|
$LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
|
407
407
|
puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
|
408
|
-
exit
|
408
|
+
exit(-1)
|
409
409
|
end
|
410
410
|
end
|
411
411
|
$LOG.info "Using init file: #{$SEQTRIMNEXT_INIT}"
|
@@ -418,7 +418,7 @@ if options[:fastq]
|
|
418
418
|
# fastq file
|
419
419
|
if (!fastq_file.nil? && fastq_file!='-' && !File.exists?(File.expand_path(fastq_file)))
|
420
420
|
$LOG.error "Input file: #{fastq_file} doesn't exists"
|
421
|
-
exit
|
421
|
+
exit(-1)
|
422
422
|
end
|
423
423
|
end
|
424
424
|
end
|
@@ -426,13 +426,13 @@ end
|
|
426
426
|
# fasta file
|
427
427
|
if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
|
428
428
|
$LOG.error "Input file: #{options[:fasta]} doesn't exists"
|
429
|
-
exit
|
429
|
+
exit(-1)
|
430
430
|
end
|
431
431
|
|
432
432
|
# qual file
|
433
433
|
if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
|
434
434
|
$LOG.error "Input file: #{options[:qual]} doesn't exists"
|
435
|
-
exit
|
435
|
+
exit(-1)
|
436
436
|
end
|
437
437
|
|
438
438
|
s = Seqtrim.new(options)
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
# $: << File.expand_path(ROOT_PATH)
|
21
21
|
|
22
|
-
$: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
|
22
|
+
# $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
|
23
23
|
# $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
|
24
24
|
|
25
25
|
require 'seqtrimnext'
|
@@ -196,7 +196,7 @@ class ExtractStats
|
|
196
196
|
|
197
197
|
# sequence min size, is taken directly from params file
|
198
198
|
# max sequence limit is calculated here
|
199
|
-
if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences')=='true')
|
199
|
+
if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences').to_s=='true')
|
200
200
|
|
201
201
|
# high size limit is calculated with stats
|
202
202
|
@params.set_param('max_sequence_size_raw',(@sequence_lengths_stats.max+10).to_i)
|
@@ -262,7 +262,7 @@ class Seqtrim
|
|
262
262
|
end
|
263
263
|
|
264
264
|
# Extract global stats
|
265
|
-
if params.get_param('generate_initial_stats')=='true'
|
265
|
+
if params.get_param('generate_initial_stats').to_s=='true'
|
266
266
|
$LOG.info "Calculatings stats"
|
267
267
|
ExtractStats.new(sequence_readers,params)
|
268
268
|
else
|
@@ -273,7 +273,7 @@ class Seqtrim
|
|
273
273
|
# save used params to file
|
274
274
|
params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
|
275
275
|
|
276
|
-
piro_on = (params.get_param('next_generation_sequences')=='true')
|
276
|
+
piro_on = (params.get_param('next_generation_sequences').to_s=='true')
|
277
277
|
|
278
278
|
params.load_mids(params.get_param('mids_db'))
|
279
279
|
params.load_ab_adapters(params.get_param('adapters_ab_db'))
|
@@ -281,7 +281,7 @@ class Seqtrim
|
|
281
281
|
params.load_linkers(params.get_param('linkers_db'))
|
282
282
|
|
283
283
|
#execute cd-hit
|
284
|
-
if params.get_param('remove_clonality')=='true'
|
284
|
+
if params.get_param('remove_clonality').to_s=='true'
|
285
285
|
cmd=get_custom_cdhit(cd_hit_input_file,params)
|
286
286
|
if cmd.empty?
|
287
287
|
cmd=get_cd_hit_cmd(cd_hit_input_file,workers,$SEQTRIMNEXT_INIT)
|
Binary file
|
@@ -100,7 +100,7 @@ class PluginIndeterminations < Plugin
|
|
100
100
|
a.right_action=true
|
101
101
|
actions.push a
|
102
102
|
|
103
|
-
if @params.get_param('middle_indetermination_rejects')=='true'
|
103
|
+
if @params.get_param('middle_indetermination_rejects').to_s=='true'
|
104
104
|
seq.seq_rejected=true
|
105
105
|
seq.seq_rejected_by_message='Indeterminations in middle of sequence'
|
106
106
|
end
|
@@ -119,7 +119,7 @@ class PluginLowQuality < Plugin
|
|
119
119
|
|
120
120
|
if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
|
121
121
|
$LOG.debug " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
|
122
|
-
elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual')=='true'))
|
122
|
+
elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual').to_s=='true'))
|
123
123
|
|
124
124
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
|
125
125
|
|
@@ -101,7 +101,7 @@ class PluginVectors < Plugin
|
|
101
101
|
# puts " near #{near_to_extrem(v,seq,min_vector_size)} #{vector_size}>=#{min_vector_size}"
|
102
102
|
#c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1) #if ab adapter is very near to the end of original sequence
|
103
103
|
|
104
|
-
piro_on=@params.get_param('next_generation_sequences')
|
104
|
+
piro_on=@params.get_param('next_generation_sequences').to_s
|
105
105
|
|
106
106
|
if (((piro_on=='true') && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionLinker)) && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionMultipleLinker))) || # if vectors DB not is contained inside detected linkers
|
107
107
|
(piro_on=='false'))
|
@@ -110,7 +110,7 @@ class PluginVectors < Plugin
|
|
110
110
|
if !near_to_extrem(v,seq,min_vector_size)
|
111
111
|
type = 'ActionUnexpectedVector'
|
112
112
|
|
113
|
-
if @params.get_param('middle_vector_rejects')=='true'
|
113
|
+
if @params.get_param('middle_vector_rejects').to_s=='true'
|
114
114
|
seq.seq_rejected=true
|
115
115
|
seq.seq_rejected_by_message='unexpected vector'
|
116
116
|
end
|
Binary file
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginAdapters
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class PluginAdaptersOld < Plugin
|
11
|
+
|
12
|
+
def get_type_adapter(p_start,p_end,seq)
|
13
|
+
#if q_beg is nearer the left, add adapter action by the left,
|
14
|
+
#if q_end esta is nearer the right , add adapter action by the right
|
15
|
+
#NOTE: If the adapter is very near from left and rigth,
|
16
|
+
#then the sequence isn't valid, because almost sequence is adapter.
|
17
|
+
|
18
|
+
|
19
|
+
v1= p_end.to_i
|
20
|
+
v2= p_start.to_i
|
21
|
+
|
22
|
+
# puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
|
23
|
+
|
24
|
+
# puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
|
25
|
+
if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
|
26
|
+
type = "ActionLeftAdapter"
|
27
|
+
|
28
|
+
else
|
29
|
+
type = "ActionRightAdapter"
|
30
|
+
|
31
|
+
end
|
32
|
+
return type
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def cut_by_right(adapter,seq)
|
37
|
+
|
38
|
+
left_size = adapter.q_beg-seq.insert_start+1
|
39
|
+
right_size = seq.insert_end-adapter.q_end+1
|
40
|
+
left_size=0 if (left_size<0)
|
41
|
+
right_size=0 if (right_size<0)
|
42
|
+
|
43
|
+
return (left_size>(right_size/2).to_i)
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def do_blasts(seqs)
|
48
|
+
# find MIDS with less results than max_target_seqs value
|
49
|
+
blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
50
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd)
|
51
|
+
|
52
|
+
fastas=[]
|
53
|
+
|
54
|
+
seqs.each do |seq|
|
55
|
+
fastas.push ">"+seq.seq_name
|
56
|
+
fastas.push seq.seq_fasta
|
57
|
+
end
|
58
|
+
|
59
|
+
# fastas=fastas.join("\n")
|
60
|
+
|
61
|
+
blast_table_results = blast.do_blast(fastas)
|
62
|
+
|
63
|
+
# puts blast_table_results.inspect
|
64
|
+
|
65
|
+
return blast_table_results
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def exec_seq(seq,blast_query)
|
70
|
+
if blast_query.query_id != seq.seq_name
|
71
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
72
|
+
end
|
73
|
+
|
74
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
|
75
|
+
|
76
|
+
|
77
|
+
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
78
|
+
|
79
|
+
# blast with only one sequence, no with many sequences from a database
|
80
|
+
#---------------------------------------------------------------------
|
81
|
+
|
82
|
+
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
|
83
|
+
|
84
|
+
#blast_table_results = BlastTableResult.new(res)
|
85
|
+
|
86
|
+
# blast_table_results.inspect
|
87
|
+
|
88
|
+
adapters=[]
|
89
|
+
# blast_table_results.querys.each do |query| # first round to save adapters without overlap
|
90
|
+
merge_hits(blast_query,adapters)
|
91
|
+
# end
|
92
|
+
|
93
|
+
begin
|
94
|
+
adapters2=adapters # second round to save adapters without overlap
|
95
|
+
adapters = []
|
96
|
+
merge_hits(adapters2,adapters)
|
97
|
+
end until (adapters2.count == adapters.count)
|
98
|
+
|
99
|
+
actions=[]
|
100
|
+
adapter_size=0
|
101
|
+
# @stats['adapter_size']={}
|
102
|
+
adapters.each do |ad| # adds the correspondent action to the sequence
|
103
|
+
|
104
|
+
type = get_type_adapter(ad.q_beg,ad.q_end,seq)
|
105
|
+
a = seq.new_action(ad.q_beg,ad.q_end,type)
|
106
|
+
# puts " state left_action #{a.left_action} right_action #{a.right_action}"
|
107
|
+
|
108
|
+
|
109
|
+
adapter_size=ad.q_end-ad.q_beg+1
|
110
|
+
|
111
|
+
if cut_by_right(ad,seq)
|
112
|
+
|
113
|
+
# puts "action right end1 #{seq.insert_end}"
|
114
|
+
|
115
|
+
a.right_action=true #mark rigth action to get the left insert
|
116
|
+
else
|
117
|
+
|
118
|
+
# puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
|
119
|
+
|
120
|
+
a.left_action = true #mark left action to get the right insert
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
a.message = ad.subject_id
|
125
|
+
a.reversed = ad.reversed
|
126
|
+
actions.push a
|
127
|
+
|
128
|
+
# @stats[:adapter_size]={adapter_size => 1}
|
129
|
+
add_stats('adapter_size',adapter_size)
|
130
|
+
|
131
|
+
end
|
132
|
+
seq.add_actions(actions)
|
133
|
+
#
|
134
|
+
end
|
135
|
+
|
136
|
+
#Returns an array with the errors due to parameters are missing
|
137
|
+
def self.check_params(params)
|
138
|
+
errors=[]
|
139
|
+
|
140
|
+
comment='Blast E-value used as cut-off when searching for adapters or primers'
|
141
|
+
default_value = 1e-6
|
142
|
+
params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
|
143
|
+
|
144
|
+
comment='Minimum required identity (%) for a reliable adapter'
|
145
|
+
default_value = 95
|
146
|
+
params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
|
147
|
+
|
148
|
+
comment='Path for adapter database'
|
149
|
+
default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
|
150
|
+
params.check_param(errors,'adapters_db','DB',default_value,comment)
|
151
|
+
|
152
|
+
return errors
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
end
|