seqtrimnext 2.0.59 → 2.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +8 -39
- data/bin/seqtrimnext +7 -7
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +1 -1
- data/lib/seqtrimnext/classes/extract_stats.rb +1 -1
- data/lib/seqtrimnext/classes/seqtrim.rb +3 -3
- data/lib/seqtrimnext/plugins/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +2 -2
- data/lib/seqtrimnext/plugins_old/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins_old/plugin_adapters_old.rb +156 -0
- data/lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb +382 -0
- data/lib/seqtrimnext/plugins_old/plugin_rem_adit_artifacts.rb +234 -0
- data/lib/seqtrimnext/version.rb +4 -0
- data/lib/seqtrimnext.rb +2 -16
- data/seqtrimnext.gemspec +38 -0
- metadata +95 -156
- data/.gemtest +0 -0
- data/History.txt +0 -130
- data/Manifest.txt +0 -125
- data/PostInstall.txt +0 -7
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_helper.rb +0 -3
- data/test/test_seqtrimnext.rb +0 -11
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a3858c7d16c231f2bd9a8a20a79e71793fa4a26b
|
4
|
+
data.tar.gz: 8737fb4a873639ccf4784fa0b0f4add4778df20f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8885fc85c63703652371ecc1a4bc67670de700b8151ba0cf3e3e706904b979a4eeb777b289c2553b229832a556887572c0c8760978f7f6306b099a085a850ab1
|
7
|
+
data.tar.gz: a9c3c18e94a240720afd44840ba297259ec888c28381ec4855eec68b38dba3387b76df33c5211b2a91f9fec5d23e03645b792ae4fa71e8c82ea36dfc965e2e1e
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 dariogf
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
File without changes
|
data/Rakefile
CHANGED
@@ -1,39 +1,8 @@
|
|
1
|
-
require
|
2
|
-
|
3
|
-
require '
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# Hoe.plugin :cucumberfeatures
|
10
|
-
|
11
|
-
# Generate all the Rake tasks
|
12
|
-
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
-
$hoe = Hoe.spec 'seqtrimnext' do
|
14
|
-
self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
|
15
|
-
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
16
|
-
self.rubyforge_name = self.name # TODO this is default value
|
17
|
-
# self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
|
18
|
-
|
19
|
-
self.extra_deps = []
|
20
|
-
self.extra_deps << ['narray','>=0']
|
21
|
-
self.extra_deps << ['gnuplot','>=0']
|
22
|
-
self.extra_deps << ['term-ansicolor','>=1.0.5']
|
23
|
-
self.extra_deps << ['xml-simple','>=1.0.12']
|
24
|
-
self.extra_deps << ['scbi_blast','>=0.0.34']
|
25
|
-
self.extra_deps << ['scbi_mapreduce','>=0.0.38']
|
26
|
-
self.extra_deps << ['scbi_fasta','>=0.1.7']
|
27
|
-
self.extra_deps << ['scbi_fastq','>=0.0.18']
|
28
|
-
self.extra_deps << ['scbi_plot','>=0.0.6']
|
29
|
-
self.extra_deps << ['scbi_math','>=0.0.1']
|
30
|
-
self.extra_deps << ['scbi_headers','>=0.0.2']
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
require 'newgem/tasks'
|
35
|
-
Dir['tasks/**/*.rake'].each { |t| load t }
|
36
|
-
|
37
|
-
# TODO - want other tests/tasks run by default? Add them to the list
|
38
|
-
# remove_task :default
|
39
|
-
# task :default => [:spec, :features, :redocs]
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = "test/*_test.rb"
|
8
|
+
end
|
data/bin/seqtrimnext
CHANGED
@@ -119,7 +119,7 @@ require 'install_requirements'
|
|
119
119
|
|
120
120
|
ins = InstallRequirements.new
|
121
121
|
if (!ins.check_install_requirements)
|
122
|
-
exit
|
122
|
+
exit(-1)
|
123
123
|
end
|
124
124
|
|
125
125
|
require "logger"
|
@@ -378,7 +378,7 @@ end
|
|
378
378
|
|
379
379
|
if !File.exists?($FORMATTED_DB_PATH)
|
380
380
|
STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
|
381
|
-
exit
|
381
|
+
exit(-1)
|
382
382
|
end
|
383
383
|
|
384
384
|
|
@@ -395,7 +395,7 @@ if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
|
|
395
395
|
puts "You must provide all required options"
|
396
396
|
puts ""
|
397
397
|
puts optparse.help
|
398
|
-
exit
|
398
|
+
exit(-1)
|
399
399
|
end
|
400
400
|
|
401
401
|
# check for template
|
@@ -405,7 +405,7 @@ if (!File.exists?(options[:template]))
|
|
405
405
|
else
|
406
406
|
$LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
|
407
407
|
puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
|
408
|
-
exit
|
408
|
+
exit(-1)
|
409
409
|
end
|
410
410
|
end
|
411
411
|
$LOG.info "Using init file: #{$SEQTRIMNEXT_INIT}"
|
@@ -418,7 +418,7 @@ if options[:fastq]
|
|
418
418
|
# fastq file
|
419
419
|
if (!fastq_file.nil? && fastq_file!='-' && !File.exists?(File.expand_path(fastq_file)))
|
420
420
|
$LOG.error "Input file: #{fastq_file} doesn't exists"
|
421
|
-
exit
|
421
|
+
exit(-1)
|
422
422
|
end
|
423
423
|
end
|
424
424
|
end
|
@@ -426,13 +426,13 @@ end
|
|
426
426
|
# fasta file
|
427
427
|
if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
|
428
428
|
$LOG.error "Input file: #{options[:fasta]} doesn't exists"
|
429
|
-
exit
|
429
|
+
exit(-1)
|
430
430
|
end
|
431
431
|
|
432
432
|
# qual file
|
433
433
|
if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
|
434
434
|
$LOG.error "Input file: #{options[:qual]} doesn't exists"
|
435
|
-
exit
|
435
|
+
exit(-1)
|
436
436
|
end
|
437
437
|
|
438
438
|
s = Seqtrim.new(options)
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
# $: << File.expand_path(ROOT_PATH)
|
21
21
|
|
22
|
-
$: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
|
22
|
+
# $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
|
23
23
|
# $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
|
24
24
|
|
25
25
|
require 'seqtrimnext'
|
@@ -196,7 +196,7 @@ class ExtractStats
|
|
196
196
|
|
197
197
|
# sequence min size, is taken directly from params file
|
198
198
|
# max sequence limit is calculated here
|
199
|
-
if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences')=='true')
|
199
|
+
if (@sequence_lengths_stats.variance_coefficient<=10) or (@params.get_param('accept_very_long_sequences').to_s=='true')
|
200
200
|
|
201
201
|
# high size limit is calculated with stats
|
202
202
|
@params.set_param('max_sequence_size_raw',(@sequence_lengths_stats.max+10).to_i)
|
@@ -262,7 +262,7 @@ class Seqtrim
|
|
262
262
|
end
|
263
263
|
|
264
264
|
# Extract global stats
|
265
|
-
if params.get_param('generate_initial_stats')=='true'
|
265
|
+
if params.get_param('generate_initial_stats').to_s=='true'
|
266
266
|
$LOG.info "Calculatings stats"
|
267
267
|
ExtractStats.new(sequence_readers,params)
|
268
268
|
else
|
@@ -273,7 +273,7 @@ class Seqtrim
|
|
273
273
|
# save used params to file
|
274
274
|
params.save_file(File.join(OUTPUT_PATH,'used_params.txt'))
|
275
275
|
|
276
|
-
piro_on = (params.get_param('next_generation_sequences')=='true')
|
276
|
+
piro_on = (params.get_param('next_generation_sequences').to_s=='true')
|
277
277
|
|
278
278
|
params.load_mids(params.get_param('mids_db'))
|
279
279
|
params.load_ab_adapters(params.get_param('adapters_ab_db'))
|
@@ -281,7 +281,7 @@ class Seqtrim
|
|
281
281
|
params.load_linkers(params.get_param('linkers_db'))
|
282
282
|
|
283
283
|
#execute cd-hit
|
284
|
-
if params.get_param('remove_clonality')=='true'
|
284
|
+
if params.get_param('remove_clonality').to_s=='true'
|
285
285
|
cmd=get_custom_cdhit(cd_hit_input_file,params)
|
286
286
|
if cmd.empty?
|
287
287
|
cmd=get_cd_hit_cmd(cd_hit_input_file,workers,$SEQTRIMNEXT_INIT)
|
Binary file
|
@@ -100,7 +100,7 @@ class PluginIndeterminations < Plugin
|
|
100
100
|
a.right_action=true
|
101
101
|
actions.push a
|
102
102
|
|
103
|
-
if @params.get_param('middle_indetermination_rejects')=='true'
|
103
|
+
if @params.get_param('middle_indetermination_rejects').to_s=='true'
|
104
104
|
seq.seq_rejected=true
|
105
105
|
seq.seq_rejected_by_message='Indeterminations in middle of sequence'
|
106
106
|
end
|
@@ -119,7 +119,7 @@ class PluginLowQuality < Plugin
|
|
119
119
|
|
120
120
|
if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
|
121
121
|
$LOG.debug " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
|
122
|
-
elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual')=='true'))
|
122
|
+
elsif ((seq.seq_qual.size>0) && (@params.get_param('use_qual').to_s=='true'))
|
123
123
|
|
124
124
|
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
|
125
125
|
|
@@ -101,7 +101,7 @@ class PluginVectors < Plugin
|
|
101
101
|
# puts " near #{near_to_extrem(v,seq,min_vector_size)} #{vector_size}>=#{min_vector_size}"
|
102
102
|
#c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1) #if ab adapter is very near to the end of original sequence
|
103
103
|
|
104
|
-
piro_on=@params.get_param('next_generation_sequences')
|
104
|
+
piro_on=@params.get_param('next_generation_sequences').to_s
|
105
105
|
|
106
106
|
if (((piro_on=='true') && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionLinker)) && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionMultipleLinker))) || # if vectors DB not is contained inside detected linkers
|
107
107
|
(piro_on=='false'))
|
@@ -110,7 +110,7 @@ class PluginVectors < Plugin
|
|
110
110
|
if !near_to_extrem(v,seq,min_vector_size)
|
111
111
|
type = 'ActionUnexpectedVector'
|
112
112
|
|
113
|
-
if @params.get_param('middle_vector_rejects')=='true'
|
113
|
+
if @params.get_param('middle_vector_rejects').to_s=='true'
|
114
114
|
seq.seq_rejected=true
|
115
115
|
seq.seq_rejected_by_message='unexpected vector'
|
116
116
|
end
|
Binary file
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginAdapters
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class PluginAdaptersOld < Plugin
|
11
|
+
|
12
|
+
def get_type_adapter(p_start,p_end,seq)
|
13
|
+
#if q_beg is nearer the left, add adapter action by the left,
|
14
|
+
#if q_end esta is nearer the right , add adapter action by the right
|
15
|
+
#NOTE: If the adapter is very near from left and rigth,
|
16
|
+
#then the sequence isn't valid, because almost sequence is adapter.
|
17
|
+
|
18
|
+
|
19
|
+
v1= p_end.to_i
|
20
|
+
v2= p_start.to_i
|
21
|
+
|
22
|
+
# puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
|
23
|
+
|
24
|
+
# puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
|
25
|
+
if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
|
26
|
+
type = "ActionLeftAdapter"
|
27
|
+
|
28
|
+
else
|
29
|
+
type = "ActionRightAdapter"
|
30
|
+
|
31
|
+
end
|
32
|
+
return type
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def cut_by_right(adapter,seq)
|
37
|
+
|
38
|
+
left_size = adapter.q_beg-seq.insert_start+1
|
39
|
+
right_size = seq.insert_end-adapter.q_end+1
|
40
|
+
left_size=0 if (left_size<0)
|
41
|
+
right_size=0 if (right_size<0)
|
42
|
+
|
43
|
+
return (left_size>(right_size/2).to_i)
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def do_blasts(seqs)
|
48
|
+
# find MIDS with less results than max_target_seqs value
|
49
|
+
blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
50
|
+
$LOG.debug('BLAST:'+blast.get_blast_cmd)
|
51
|
+
|
52
|
+
fastas=[]
|
53
|
+
|
54
|
+
seqs.each do |seq|
|
55
|
+
fastas.push ">"+seq.seq_name
|
56
|
+
fastas.push seq.seq_fasta
|
57
|
+
end
|
58
|
+
|
59
|
+
# fastas=fastas.join("\n")
|
60
|
+
|
61
|
+
blast_table_results = blast.do_blast(fastas)
|
62
|
+
|
63
|
+
# puts blast_table_results.inspect
|
64
|
+
|
65
|
+
return blast_table_results
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def exec_seq(seq,blast_query)
|
70
|
+
if blast_query.query_id != seq.seq_name
|
71
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
72
|
+
end
|
73
|
+
|
74
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
|
75
|
+
|
76
|
+
|
77
|
+
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
78
|
+
|
79
|
+
# blast with only one sequence, no with many sequences from a database
|
80
|
+
#---------------------------------------------------------------------
|
81
|
+
|
82
|
+
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
|
83
|
+
|
84
|
+
#blast_table_results = BlastTableResult.new(res)
|
85
|
+
|
86
|
+
# blast_table_results.inspect
|
87
|
+
|
88
|
+
adapters=[]
|
89
|
+
# blast_table_results.querys.each do |query| # first round to save adapters without overlap
|
90
|
+
merge_hits(blast_query,adapters)
|
91
|
+
# end
|
92
|
+
|
93
|
+
begin
|
94
|
+
adapters2=adapters # second round to save adapters without overlap
|
95
|
+
adapters = []
|
96
|
+
merge_hits(adapters2,adapters)
|
97
|
+
end until (adapters2.count == adapters.count)
|
98
|
+
|
99
|
+
actions=[]
|
100
|
+
adapter_size=0
|
101
|
+
# @stats['adapter_size']={}
|
102
|
+
adapters.each do |ad| # adds the correspondent action to the sequence
|
103
|
+
|
104
|
+
type = get_type_adapter(ad.q_beg,ad.q_end,seq)
|
105
|
+
a = seq.new_action(ad.q_beg,ad.q_end,type)
|
106
|
+
# puts " state left_action #{a.left_action} right_action #{a.right_action}"
|
107
|
+
|
108
|
+
|
109
|
+
adapter_size=ad.q_end-ad.q_beg+1
|
110
|
+
|
111
|
+
if cut_by_right(ad,seq)
|
112
|
+
|
113
|
+
# puts "action right end1 #{seq.insert_end}"
|
114
|
+
|
115
|
+
a.right_action=true #mark rigth action to get the left insert
|
116
|
+
else
|
117
|
+
|
118
|
+
# puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
|
119
|
+
|
120
|
+
a.left_action = true #mark left action to get the right insert
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
a.message = ad.subject_id
|
125
|
+
a.reversed = ad.reversed
|
126
|
+
actions.push a
|
127
|
+
|
128
|
+
# @stats[:adapter_size]={adapter_size => 1}
|
129
|
+
add_stats('adapter_size',adapter_size)
|
130
|
+
|
131
|
+
end
|
132
|
+
seq.add_actions(actions)
|
133
|
+
#
|
134
|
+
end
|
135
|
+
|
136
|
+
#Returns an array with the errors due to parameters are missing
|
137
|
+
def self.check_params(params)
|
138
|
+
errors=[]
|
139
|
+
|
140
|
+
comment='Blast E-value used as cut-off when searching for adapters or primers'
|
141
|
+
default_value = 1e-6
|
142
|
+
params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
|
143
|
+
|
144
|
+
comment='Minimum required identity (%) for a reliable adapter'
|
145
|
+
default_value = 95
|
146
|
+
params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
|
147
|
+
|
148
|
+
comment='Path for adapter database'
|
149
|
+
default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
|
150
|
+
params.check_param(errors,'adapters_db','DB',default_value,comment)
|
151
|
+
|
152
|
+
return errors
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
end
|