seqtrimnext 2.0.50 → 2.0.51

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.51 2012-06-20
2
+
3
+ Added cont_viruses database
4
+
1
5
  === 2.0.50 2012-06-13
2
6
 
3
7
  Added classification plugin
data/Manifest.txt CHANGED
@@ -78,7 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
78
78
  lib/seqtrimnext/plugins/plugin_adapters_old.rb
79
79
  lib/seqtrimnext/plugins/plugin_amplicons.rb
80
80
  lib/seqtrimnext/plugins/plugin_contaminants.rb
81
- lib/seqtrimnext/plugins/plugin_classify.rb
81
+ lib/seqtrimnext/plugins/plugin_user_contaminants.rb
82
82
  lib/seqtrimnext/plugins/plugin_extract_inserts.rb
83
83
  lib/seqtrimnext/plugins/plugin_find_poly_at.rb
84
84
  lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -95,11 +95,10 @@ lib/seqtrimnext/plugins/plugin_vectors.rb
95
95
  lib/seqtrimnext/templates/amplicons.txt
96
96
  lib/seqtrimnext/templates/genomics_454.txt
97
97
  lib/seqtrimnext/templates/genomics_454_with_paired.txt
98
- lib/seqtrimnext/templates/genomics_illumina.txt
99
- lib/seqtrimnext/templates/low_quality.txt
100
- lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
98
+ lib/seqtrimnext/templates/genomics_short_reads.txt
99
+ lib/seqtrimnext/templates/genomics_short_reads_2.txt
101
100
  lib/seqtrimnext/templates/transcriptomics_454.txt
102
- lib/seqtrimnext/templates/transcriptomics_illumina.txt
101
+ lib/seqtrimnext/templates/transcriptomics_short_reads.txt
103
102
  lib/seqtrimnext/templates/transcriptomics_plants.txt
104
103
  lib/seqtrimnext/utils/extract_samples.rb
105
104
  lib/seqtrimnext/utils/fasta2xml.rb
data/Rakefile CHANGED
@@ -24,7 +24,7 @@ $hoe = Hoe.spec 'seqtrimnext' do
24
24
  self.extra_deps << ['scbi_blast','>=0.0.34']
25
25
  self.extra_deps << ['scbi_mapreduce','>=0.0.38']
26
26
  self.extra_deps << ['scbi_fasta','>=0.1.7']
27
- self.extra_deps << ['scbi_fastq','>=0.0.15']
27
+ self.extra_deps << ['scbi_fastq','>=0.0.16']
28
28
  self.extra_deps << ['scbi_plot','>=0.0.6']
29
29
  self.extra_deps << ['scbi_math','>=0.0.1']
30
30
 
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.50'
33
+ VERSION = '2.0.51'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
@@ -6,7 +6,7 @@ class InstallDatabase
6
6
  def initialize(type,db_path)
7
7
 
8
8
 
9
- types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
9
+ types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome','cont_viruses']
10
10
 
11
11
  if types.include?(type)
12
12
 
@@ -38,7 +38,6 @@ class PluginRemAditArtifacts < Plugin
38
38
  while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
39
39
  first += 6
40
40
  seq2.slice!(0..5)
41
-
42
41
  end
43
42
 
44
43
 
@@ -8,7 +8,7 @@ require "make_blast_db"
8
8
  # Inherit: Plugin
9
9
  ########################################################
10
10
 
11
- class PluginClassify < Plugin
11
+ class PluginUserContaminants < Plugin
12
12
 
13
13
 
14
14
  MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
@@ -47,7 +47,7 @@ class PluginClassify < Plugin
47
47
  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
48
48
  # y una secuencia de baja complejidad como entrada
49
49
 
50
- blast = BatchBlast.new("-db #{@params.get_param('classify_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
50
+ blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
51
51
 
52
52
  $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
53
53
 
@@ -133,7 +133,7 @@ class PluginClassify < Plugin
133
133
 
134
134
  comment='Path for classify database'
135
135
  default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
136
- params.check_param(errors,'classify_db','DB',default_value,comment)
136
+ params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
137
137
 
138
138
 
139
139
  return errors
@@ -2,6 +2,27 @@
2
2
  # General parameters to extract Amplicons
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to get Amplicons<br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
12
+ # Help: <li>PluginMids: trimming Roche 454 MIDs</li>
13
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
14
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
15
+ # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
16
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
17
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
19
+ # Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
20
+ # Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
21
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
22
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
23
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
24
+ # Help: </ul>
25
+
5
26
  plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
6
27
 
7
28
  # do not remove cloned sequences
@@ -2,4 +2,17 @@
2
2
  # General parameters GENOMICS WITH POSSIBLE LINKER
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+ # Help: <ul>
9
+ # Help: <li>PluginLowHighSize</li>
10
+ # Help: <li>PluginMids</li>
11
+ # Help: <li>PluginIndeterminations</li>
12
+ # Help: <li>PluginAbAdapters</li>
13
+ # Help: <li>PluginContaminants</li>
14
+ # Help: <li>PluginAmplicons</li>
15
+ # Help: <li>PluginLowQuality</li>
16
+ # Help: </ul>
17
+
5
18
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
@@ -2,4 +2,6 @@
2
2
  # General parameters GENOMICS WITH POSSIBLE LINKER
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
6
+
5
7
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
@@ -0,0 +1,7 @@
1
+ # ==============================================================
2
+ # General parameters for genomics - Illumina/SOLiD (short reads)
3
+ # ==============================================================
4
+
5
+ # Help: <br/>This template is used to preprocess short reads for genomics <br/>
6
+
7
+ plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
@@ -0,0 +1,7 @@
1
+ # ==============================================================
2
+ # General parameters for genomics - Illumina/SOLiD (short reads)
3
+ # ==============================================================
4
+
5
+ # Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
6
+
7
+ plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
@@ -1,7 +1,9 @@
1
1
  # ======================================
2
- # General parameters
2
+ # General parameters TRANSCRIPTOMICS
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
6
+
5
7
 
6
8
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
7
9
 
@@ -1,7 +1,8 @@
1
1
  # ======================================
2
- # General parameters
2
+ # General parameters TRANSCRIPTOMICS PLANTS
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
5
6
 
6
7
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
7
8
 
@@ -0,0 +1,9 @@
1
+ # ======================================
2
+ # General parameters for transcriptomics - Illumina/SOLiD (short reads)
3
+ # ======================================
4
+
5
+ # Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
6
+
7
+ plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
+
9
+ contaminants_db="contaminants.fasta cont_ribosome.fasta"
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.50
5
+ version: 2.0.51
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-06-13 00:00:00 Z
13
+ date: 2012-06-20 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -97,7 +97,7 @@ dependencies:
97
97
  requirements:
98
98
  - - ">="
99
99
  - !ruby/object:Gem::Version
100
- version: 0.0.15
100
+ version: 0.0.16
101
101
  type: :runtime
102
102
  version_requirements: *id008
103
103
  - !ruby/object:Gem::Dependency
@@ -166,11 +166,10 @@ extra_rdoc_files:
166
166
  - lib/seqtrimnext/templates/amplicons.txt
167
167
  - lib/seqtrimnext/templates/genomics_454.txt
168
168
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
169
- - lib/seqtrimnext/templates/genomics_illumina.txt
170
- - lib/seqtrimnext/templates/low_quality.txt
171
- - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
169
+ - lib/seqtrimnext/templates/genomics_short_reads.txt
170
+ - lib/seqtrimnext/templates/genomics_short_reads_2.txt
172
171
  - lib/seqtrimnext/templates/transcriptomics_454.txt
173
- - lib/seqtrimnext/templates/transcriptomics_illumina.txt
172
+ - lib/seqtrimnext/templates/transcriptomics_short_reads.txt
174
173
  - lib/seqtrimnext/templates/transcriptomics_plants.txt
175
174
  - Manifest.txt
176
175
  - PostInstall.txt
@@ -255,7 +254,7 @@ files:
255
254
  - lib/seqtrimnext/plugins/plugin_adapters_old.rb
256
255
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
257
256
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
258
- - lib/seqtrimnext/plugins/plugin_classify.rb
257
+ - lib/seqtrimnext/plugins/plugin_user_contaminants.rb
259
258
  - lib/seqtrimnext/plugins/plugin_extract_inserts.rb
260
259
  - lib/seqtrimnext/plugins/plugin_find_poly_at.rb
261
260
  - lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -272,11 +271,10 @@ files:
272
271
  - lib/seqtrimnext/templates/amplicons.txt
273
272
  - lib/seqtrimnext/templates/genomics_454.txt
274
273
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
275
- - lib/seqtrimnext/templates/genomics_illumina.txt
276
- - lib/seqtrimnext/templates/low_quality.txt
277
- - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
274
+ - lib/seqtrimnext/templates/genomics_short_reads.txt
275
+ - lib/seqtrimnext/templates/genomics_short_reads_2.txt
278
276
  - lib/seqtrimnext/templates/transcriptomics_454.txt
279
- - lib/seqtrimnext/templates/transcriptomics_illumina.txt
277
+ - lib/seqtrimnext/templates/transcriptomics_short_reads.txt
280
278
  - lib/seqtrimnext/templates/transcriptomics_plants.txt
281
279
  - lib/seqtrimnext/utils/extract_samples.rb
282
280
  - lib/seqtrimnext/utils/fasta2xml.rb
@@ -1,5 +0,0 @@
1
- # ======================================
2
- # General parameters GENOMICS illumina
3
- # ======================================
4
-
5
- plugin_list = PluginLowHighSize,PluginIndeterminations,PluginContaminants,PluginLowQuality
@@ -1,5 +0,0 @@
1
- # ======================================
2
- # General parameters GENOMICS WITH POSSIBLE LINKER
3
- # ======================================
4
-
5
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
@@ -1,5 +0,0 @@
1
- # ======================================
2
- # General parameters GENOMICS WITH POSSIBLE LINKER
3
- # ======================================
4
-
5
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
@@ -1,7 +0,0 @@
1
- # ======================================
2
- # General parameters
3
- # ======================================
4
-
5
- plugin_list = PluginLowHighSize,PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginLowQuality,PluginLowComplexity
6
-
7
- contaminants_db="contaminants.fasta cont_ribosome.fasta"