seqtrimnext 2.0.50 → 2.0.51
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +4 -5
- data/Rakefile +1 -1
- data/lib/seqtrimnext.rb +1 -1
- data/lib/seqtrimnext/classes/install_database.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -1
- data/lib/seqtrimnext/plugins/{plugin_classify.rb → plugin_user_contaminants.rb} +3 -3
- data/lib/seqtrimnext/templates/amplicons.txt +21 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +13 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +2 -0
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +7 -0
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +7 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +3 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +2 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +9 -0
- metadata +10 -12
- data/lib/seqtrimnext/templates/genomics_illumina.txt +0 -5
- data/lib/seqtrimnext/templates/low_quality.txt +0 -5
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +0 -5
- data/lib/seqtrimnext/templates/transcriptomics_illumina.txt +0 -7
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -78,7 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
|
|
78
78
|
lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
79
79
|
lib/seqtrimnext/plugins/plugin_amplicons.rb
|
80
80
|
lib/seqtrimnext/plugins/plugin_contaminants.rb
|
81
|
-
lib/seqtrimnext/plugins/
|
81
|
+
lib/seqtrimnext/plugins/plugin_user_contaminants.rb
|
82
82
|
lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
83
83
|
lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
84
84
|
lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -95,11 +95,10 @@ lib/seqtrimnext/plugins/plugin_vectors.rb
|
|
95
95
|
lib/seqtrimnext/templates/amplicons.txt
|
96
96
|
lib/seqtrimnext/templates/genomics_454.txt
|
97
97
|
lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
98
|
-
lib/seqtrimnext/templates/
|
99
|
-
lib/seqtrimnext/templates/
|
100
|
-
lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
98
|
+
lib/seqtrimnext/templates/genomics_short_reads.txt
|
99
|
+
lib/seqtrimnext/templates/genomics_short_reads_2.txt
|
101
100
|
lib/seqtrimnext/templates/transcriptomics_454.txt
|
102
|
-
lib/seqtrimnext/templates/
|
101
|
+
lib/seqtrimnext/templates/transcriptomics_short_reads.txt
|
103
102
|
lib/seqtrimnext/templates/transcriptomics_plants.txt
|
104
103
|
lib/seqtrimnext/utils/extract_samples.rb
|
105
104
|
lib/seqtrimnext/utils/fasta2xml.rb
|
data/Rakefile
CHANGED
@@ -24,7 +24,7 @@ $hoe = Hoe.spec 'seqtrimnext' do
|
|
24
24
|
self.extra_deps << ['scbi_blast','>=0.0.34']
|
25
25
|
self.extra_deps << ['scbi_mapreduce','>=0.0.38']
|
26
26
|
self.extra_deps << ['scbi_fasta','>=0.1.7']
|
27
|
-
self.extra_deps << ['scbi_fastq','>=0.0.
|
27
|
+
self.extra_deps << ['scbi_fastq','>=0.0.16']
|
28
28
|
self.extra_deps << ['scbi_plot','>=0.0.6']
|
29
29
|
self.extra_deps << ['scbi_math','>=0.0.1']
|
30
30
|
|
data/lib/seqtrimnext.rb
CHANGED
@@ -6,7 +6,7 @@ class InstallDatabase
|
|
6
6
|
def initialize(type,db_path)
|
7
7
|
|
8
8
|
|
9
|
-
types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
|
9
|
+
types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome','cont_viruses']
|
10
10
|
|
11
11
|
if types.include?(type)
|
12
12
|
|
@@ -8,7 +8,7 @@ require "make_blast_db"
|
|
8
8
|
# Inherit: Plugin
|
9
9
|
########################################################
|
10
10
|
|
11
|
-
class
|
11
|
+
class PluginUserContaminants < Plugin
|
12
12
|
|
13
13
|
|
14
14
|
MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
|
@@ -47,7 +47,7 @@ class PluginClassify < Plugin
|
|
47
47
|
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
48
48
|
# y una secuencia de baja complejidad como entrada
|
49
49
|
|
50
|
-
blast = BatchBlast.new("-db #{@params.get_param('
|
50
|
+
blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
|
51
51
|
|
52
52
|
$LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
|
53
53
|
|
@@ -133,7 +133,7 @@ class PluginClassify < Plugin
|
|
133
133
|
|
134
134
|
comment='Path for classify database'
|
135
135
|
default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
|
136
|
-
params.check_param(errors,'
|
136
|
+
params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
|
137
137
|
|
138
138
|
|
139
139
|
return errors
|
@@ -2,6 +2,27 @@
|
|
2
2
|
# General parameters to extract Amplicons
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to get Amplicons<br/>
|
6
|
+
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
|
9
|
+
# Help: <ul>
|
10
|
+
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
11
|
+
# Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
|
12
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs</li>
|
13
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
14
|
+
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
15
|
+
# Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
|
16
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
17
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
18
|
+
# Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
|
19
|
+
# Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
|
20
|
+
# Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
|
21
|
+
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
22
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
23
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
24
|
+
# Help: </ul>
|
25
|
+
|
5
26
|
plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
|
6
27
|
|
7
28
|
# do not remove cloned sequences
|
@@ -2,4 +2,17 @@
|
|
2
2
|
# General parameters GENOMICS WITH POSSIBLE LINKER
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
|
6
|
+
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
# Help: <ul>
|
9
|
+
# Help: <li>PluginLowHighSize</li>
|
10
|
+
# Help: <li>PluginMids</li>
|
11
|
+
# Help: <li>PluginIndeterminations</li>
|
12
|
+
# Help: <li>PluginAbAdapters</li>
|
13
|
+
# Help: <li>PluginContaminants</li>
|
14
|
+
# Help: <li>PluginAmplicons</li>
|
15
|
+
# Help: <li>PluginLowQuality</li>
|
16
|
+
# Help: </ul>
|
17
|
+
|
5
18
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
|
@@ -2,4 +2,6 @@
|
|
2
2
|
# General parameters GENOMICS WITH POSSIBLE LINKER
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
|
6
|
+
|
5
7
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# ==============================================================
|
2
|
+
# General parameters for genomics - Illumina/SOLiD (short reads)
|
3
|
+
# ==============================================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess short reads for genomics <br/>
|
6
|
+
|
7
|
+
plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# ==============================================================
|
2
|
+
# General parameters for genomics - Illumina/SOLiD (short reads)
|
3
|
+
# ==============================================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
|
6
|
+
|
7
|
+
plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# ======================================
|
2
|
-
# General parameters
|
2
|
+
# General parameters TRANSCRIPTOMICS
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
|
6
|
+
|
5
7
|
|
6
8
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
7
9
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# ======================================
|
2
|
-
# General parameters
|
2
|
+
# General parameters TRANSCRIPTOMICS PLANTS
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
|
5
6
|
|
6
7
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
7
8
|
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# ======================================
|
2
|
+
# General parameters for transcriptomics - Illumina/SOLiD (short reads)
|
3
|
+
# ======================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
|
6
|
+
|
7
|
+
plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
8
|
+
|
9
|
+
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.0.
|
5
|
+
version: 2.0.51
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero & Almudena Bocinos
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-06-
|
13
|
+
date: 2012-06-20 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: narray
|
@@ -97,7 +97,7 @@ dependencies:
|
|
97
97
|
requirements:
|
98
98
|
- - ">="
|
99
99
|
- !ruby/object:Gem::Version
|
100
|
-
version: 0.0.
|
100
|
+
version: 0.0.16
|
101
101
|
type: :runtime
|
102
102
|
version_requirements: *id008
|
103
103
|
- !ruby/object:Gem::Dependency
|
@@ -166,11 +166,10 @@ extra_rdoc_files:
|
|
166
166
|
- lib/seqtrimnext/templates/amplicons.txt
|
167
167
|
- lib/seqtrimnext/templates/genomics_454.txt
|
168
168
|
- lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
169
|
-
- lib/seqtrimnext/templates/
|
170
|
-
- lib/seqtrimnext/templates/
|
171
|
-
- lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
169
|
+
- lib/seqtrimnext/templates/genomics_short_reads.txt
|
170
|
+
- lib/seqtrimnext/templates/genomics_short_reads_2.txt
|
172
171
|
- lib/seqtrimnext/templates/transcriptomics_454.txt
|
173
|
-
- lib/seqtrimnext/templates/
|
172
|
+
- lib/seqtrimnext/templates/transcriptomics_short_reads.txt
|
174
173
|
- lib/seqtrimnext/templates/transcriptomics_plants.txt
|
175
174
|
- Manifest.txt
|
176
175
|
- PostInstall.txt
|
@@ -255,7 +254,7 @@ files:
|
|
255
254
|
- lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
256
255
|
- lib/seqtrimnext/plugins/plugin_amplicons.rb
|
257
256
|
- lib/seqtrimnext/plugins/plugin_contaminants.rb
|
258
|
-
- lib/seqtrimnext/plugins/
|
257
|
+
- lib/seqtrimnext/plugins/plugin_user_contaminants.rb
|
259
258
|
- lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
260
259
|
- lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
261
260
|
- lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -272,11 +271,10 @@ files:
|
|
272
271
|
- lib/seqtrimnext/templates/amplicons.txt
|
273
272
|
- lib/seqtrimnext/templates/genomics_454.txt
|
274
273
|
- lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
275
|
-
- lib/seqtrimnext/templates/
|
276
|
-
- lib/seqtrimnext/templates/
|
277
|
-
- lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
274
|
+
- lib/seqtrimnext/templates/genomics_short_reads.txt
|
275
|
+
- lib/seqtrimnext/templates/genomics_short_reads_2.txt
|
278
276
|
- lib/seqtrimnext/templates/transcriptomics_454.txt
|
279
|
-
- lib/seqtrimnext/templates/
|
277
|
+
- lib/seqtrimnext/templates/transcriptomics_short_reads.txt
|
280
278
|
- lib/seqtrimnext/templates/transcriptomics_plants.txt
|
281
279
|
- lib/seqtrimnext/utils/extract_samples.rb
|
282
280
|
- lib/seqtrimnext/utils/fasta2xml.rb
|
@@ -1,7 +0,0 @@
|
|
1
|
-
# ======================================
|
2
|
-
# General parameters
|
3
|
-
# ======================================
|
4
|
-
|
5
|
-
plugin_list = PluginLowHighSize,PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginLowQuality,PluginLowComplexity
|
6
|
-
|
7
|
-
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|