seqtrimnext 2.0.50 → 2.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +4 -5
- data/Rakefile +1 -1
- data/lib/seqtrimnext.rb +1 -1
- data/lib/seqtrimnext/classes/install_database.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -1
- data/lib/seqtrimnext/plugins/{plugin_classify.rb → plugin_user_contaminants.rb} +3 -3
- data/lib/seqtrimnext/templates/amplicons.txt +21 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +13 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +2 -0
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +7 -0
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +7 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +3 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +2 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +9 -0
- metadata +10 -12
- data/lib/seqtrimnext/templates/genomics_illumina.txt +0 -5
- data/lib/seqtrimnext/templates/low_quality.txt +0 -5
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +0 -5
- data/lib/seqtrimnext/templates/transcriptomics_illumina.txt +0 -7
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -78,7 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
|
|
78
78
|
lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
79
79
|
lib/seqtrimnext/plugins/plugin_amplicons.rb
|
80
80
|
lib/seqtrimnext/plugins/plugin_contaminants.rb
|
81
|
-
lib/seqtrimnext/plugins/
|
81
|
+
lib/seqtrimnext/plugins/plugin_user_contaminants.rb
|
82
82
|
lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
83
83
|
lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
84
84
|
lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -95,11 +95,10 @@ lib/seqtrimnext/plugins/plugin_vectors.rb
|
|
95
95
|
lib/seqtrimnext/templates/amplicons.txt
|
96
96
|
lib/seqtrimnext/templates/genomics_454.txt
|
97
97
|
lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
98
|
-
lib/seqtrimnext/templates/
|
99
|
-
lib/seqtrimnext/templates/
|
100
|
-
lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
98
|
+
lib/seqtrimnext/templates/genomics_short_reads.txt
|
99
|
+
lib/seqtrimnext/templates/genomics_short_reads_2.txt
|
101
100
|
lib/seqtrimnext/templates/transcriptomics_454.txt
|
102
|
-
lib/seqtrimnext/templates/
|
101
|
+
lib/seqtrimnext/templates/transcriptomics_short_reads.txt
|
103
102
|
lib/seqtrimnext/templates/transcriptomics_plants.txt
|
104
103
|
lib/seqtrimnext/utils/extract_samples.rb
|
105
104
|
lib/seqtrimnext/utils/fasta2xml.rb
|
data/Rakefile
CHANGED
@@ -24,7 +24,7 @@ $hoe = Hoe.spec 'seqtrimnext' do
|
|
24
24
|
self.extra_deps << ['scbi_blast','>=0.0.34']
|
25
25
|
self.extra_deps << ['scbi_mapreduce','>=0.0.38']
|
26
26
|
self.extra_deps << ['scbi_fasta','>=0.1.7']
|
27
|
-
self.extra_deps << ['scbi_fastq','>=0.0.
|
27
|
+
self.extra_deps << ['scbi_fastq','>=0.0.16']
|
28
28
|
self.extra_deps << ['scbi_plot','>=0.0.6']
|
29
29
|
self.extra_deps << ['scbi_math','>=0.0.1']
|
30
30
|
|
data/lib/seqtrimnext.rb
CHANGED
@@ -6,7 +6,7 @@ class InstallDatabase
|
|
6
6
|
def initialize(type,db_path)
|
7
7
|
|
8
8
|
|
9
|
-
types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
|
9
|
+
types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome','cont_viruses']
|
10
10
|
|
11
11
|
if types.include?(type)
|
12
12
|
|
@@ -8,7 +8,7 @@ require "make_blast_db"
|
|
8
8
|
# Inherit: Plugin
|
9
9
|
########################################################
|
10
10
|
|
11
|
-
class
|
11
|
+
class PluginUserContaminants < Plugin
|
12
12
|
|
13
13
|
|
14
14
|
MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
|
@@ -47,7 +47,7 @@ class PluginClassify < Plugin
|
|
47
47
|
# TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
|
48
48
|
# y una secuencia de baja complejidad como entrada
|
49
49
|
|
50
|
-
blast = BatchBlast.new("-db #{@params.get_param('
|
50
|
+
blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
|
51
51
|
|
52
52
|
$LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
|
53
53
|
|
@@ -133,7 +133,7 @@ class PluginClassify < Plugin
|
|
133
133
|
|
134
134
|
comment='Path for classify database'
|
135
135
|
default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
|
136
|
-
params.check_param(errors,'
|
136
|
+
params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
|
137
137
|
|
138
138
|
|
139
139
|
return errors
|
@@ -2,6 +2,27 @@
|
|
2
2
|
# General parameters to extract Amplicons
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to get Amplicons<br/>
|
6
|
+
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
|
9
|
+
# Help: <ul>
|
10
|
+
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
11
|
+
# Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
|
12
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs</li>
|
13
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
14
|
+
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
15
|
+
# Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
|
16
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
17
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
18
|
+
# Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
|
19
|
+
# Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
|
20
|
+
# Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
|
21
|
+
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
22
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
23
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
24
|
+
# Help: </ul>
|
25
|
+
|
5
26
|
plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
|
6
27
|
|
7
28
|
# do not remove cloned sequences
|
@@ -2,4 +2,17 @@
|
|
2
2
|
# General parameters GENOMICS WITH POSSIBLE LINKER
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
|
6
|
+
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
# Help: <ul>
|
9
|
+
# Help: <li>PluginLowHighSize</li>
|
10
|
+
# Help: <li>PluginMids</li>
|
11
|
+
# Help: <li>PluginIndeterminations</li>
|
12
|
+
# Help: <li>PluginAbAdapters</li>
|
13
|
+
# Help: <li>PluginContaminants</li>
|
14
|
+
# Help: <li>PluginAmplicons</li>
|
15
|
+
# Help: <li>PluginLowQuality</li>
|
16
|
+
# Help: </ul>
|
17
|
+
|
5
18
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
|
@@ -2,4 +2,6 @@
|
|
2
2
|
# General parameters GENOMICS WITH POSSIBLE LINKER
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
|
6
|
+
|
5
7
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# ==============================================================
|
2
|
+
# General parameters for genomics - Illumina/SOLiD (short reads)
|
3
|
+
# ==============================================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess short reads for genomics <br/>
|
6
|
+
|
7
|
+
plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# ==============================================================
|
2
|
+
# General parameters for genomics - Illumina/SOLiD (short reads)
|
3
|
+
# ==============================================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
|
6
|
+
|
7
|
+
plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# ======================================
|
2
|
-
# General parameters
|
2
|
+
# General parameters TRANSCRIPTOMICS
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
|
6
|
+
|
5
7
|
|
6
8
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
7
9
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# ======================================
|
2
|
-
# General parameters
|
2
|
+
# General parameters TRANSCRIPTOMICS PLANTS
|
3
3
|
# ======================================
|
4
4
|
|
5
|
+
# Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
|
5
6
|
|
6
7
|
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
7
8
|
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# ======================================
|
2
|
+
# General parameters for transcriptomics - Illumina/SOLiD (short reads)
|
3
|
+
# ======================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
|
6
|
+
|
7
|
+
plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
8
|
+
|
9
|
+
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.0.
|
5
|
+
version: 2.0.51
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero & Almudena Bocinos
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-06-
|
13
|
+
date: 2012-06-20 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: narray
|
@@ -97,7 +97,7 @@ dependencies:
|
|
97
97
|
requirements:
|
98
98
|
- - ">="
|
99
99
|
- !ruby/object:Gem::Version
|
100
|
-
version: 0.0.
|
100
|
+
version: 0.0.16
|
101
101
|
type: :runtime
|
102
102
|
version_requirements: *id008
|
103
103
|
- !ruby/object:Gem::Dependency
|
@@ -166,11 +166,10 @@ extra_rdoc_files:
|
|
166
166
|
- lib/seqtrimnext/templates/amplicons.txt
|
167
167
|
- lib/seqtrimnext/templates/genomics_454.txt
|
168
168
|
- lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
169
|
-
- lib/seqtrimnext/templates/
|
170
|
-
- lib/seqtrimnext/templates/
|
171
|
-
- lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
169
|
+
- lib/seqtrimnext/templates/genomics_short_reads.txt
|
170
|
+
- lib/seqtrimnext/templates/genomics_short_reads_2.txt
|
172
171
|
- lib/seqtrimnext/templates/transcriptomics_454.txt
|
173
|
-
- lib/seqtrimnext/templates/
|
172
|
+
- lib/seqtrimnext/templates/transcriptomics_short_reads.txt
|
174
173
|
- lib/seqtrimnext/templates/transcriptomics_plants.txt
|
175
174
|
- Manifest.txt
|
176
175
|
- PostInstall.txt
|
@@ -255,7 +254,7 @@ files:
|
|
255
254
|
- lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
256
255
|
- lib/seqtrimnext/plugins/plugin_amplicons.rb
|
257
256
|
- lib/seqtrimnext/plugins/plugin_contaminants.rb
|
258
|
-
- lib/seqtrimnext/plugins/
|
257
|
+
- lib/seqtrimnext/plugins/plugin_user_contaminants.rb
|
259
258
|
- lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
260
259
|
- lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
261
260
|
- lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
@@ -272,11 +271,10 @@ files:
|
|
272
271
|
- lib/seqtrimnext/templates/amplicons.txt
|
273
272
|
- lib/seqtrimnext/templates/genomics_454.txt
|
274
273
|
- lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
275
|
-
- lib/seqtrimnext/templates/
|
276
|
-
- lib/seqtrimnext/templates/
|
277
|
-
- lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
274
|
+
- lib/seqtrimnext/templates/genomics_short_reads.txt
|
275
|
+
- lib/seqtrimnext/templates/genomics_short_reads_2.txt
|
278
276
|
- lib/seqtrimnext/templates/transcriptomics_454.txt
|
279
|
-
- lib/seqtrimnext/templates/
|
277
|
+
- lib/seqtrimnext/templates/transcriptomics_short_reads.txt
|
280
278
|
- lib/seqtrimnext/templates/transcriptomics_plants.txt
|
281
279
|
- lib/seqtrimnext/utils/extract_samples.rb
|
282
280
|
- lib/seqtrimnext/utils/fasta2xml.rb
|
@@ -1,7 +0,0 @@
|
|
1
|
-
# ======================================
|
2
|
-
# General parameters
|
3
|
-
# ======================================
|
4
|
-
|
5
|
-
plugin_list = PluginLowHighSize,PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginLowQuality,PluginLowComplexity
|
6
|
-
|
7
|
-
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|