seqtrimnext 2.0.50 → 2.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.51 2012-06-20
2
+
3
+ Added cont_viruses database
4
+
1
5
  === 2.0.50 2012-06-13
2
6
 
3
7
  Added classification plugin
data/Manifest.txt CHANGED
@@ -78,7 +78,7 @@ lib/seqtrimnext/plugins/plugin_adapters.rb
78
78
  lib/seqtrimnext/plugins/plugin_adapters_old.rb
79
79
  lib/seqtrimnext/plugins/plugin_amplicons.rb
80
80
  lib/seqtrimnext/plugins/plugin_contaminants.rb
81
- lib/seqtrimnext/plugins/plugin_classify.rb
81
+ lib/seqtrimnext/plugins/plugin_user_contaminants.rb
82
82
  lib/seqtrimnext/plugins/plugin_extract_inserts.rb
83
83
  lib/seqtrimnext/plugins/plugin_find_poly_at.rb
84
84
  lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -95,11 +95,10 @@ lib/seqtrimnext/plugins/plugin_vectors.rb
95
95
  lib/seqtrimnext/templates/amplicons.txt
96
96
  lib/seqtrimnext/templates/genomics_454.txt
97
97
  lib/seqtrimnext/templates/genomics_454_with_paired.txt
98
- lib/seqtrimnext/templates/genomics_illumina.txt
99
- lib/seqtrimnext/templates/low_quality.txt
100
- lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
98
+ lib/seqtrimnext/templates/genomics_short_reads.txt
99
+ lib/seqtrimnext/templates/genomics_short_reads_2.txt
101
100
  lib/seqtrimnext/templates/transcriptomics_454.txt
102
- lib/seqtrimnext/templates/transcriptomics_illumina.txt
101
+ lib/seqtrimnext/templates/transcriptomics_short_reads.txt
103
102
  lib/seqtrimnext/templates/transcriptomics_plants.txt
104
103
  lib/seqtrimnext/utils/extract_samples.rb
105
104
  lib/seqtrimnext/utils/fasta2xml.rb
data/Rakefile CHANGED
@@ -24,7 +24,7 @@ $hoe = Hoe.spec 'seqtrimnext' do
24
24
  self.extra_deps << ['scbi_blast','>=0.0.34']
25
25
  self.extra_deps << ['scbi_mapreduce','>=0.0.38']
26
26
  self.extra_deps << ['scbi_fasta','>=0.1.7']
27
- self.extra_deps << ['scbi_fastq','>=0.0.15']
27
+ self.extra_deps << ['scbi_fastq','>=0.0.16']
28
28
  self.extra_deps << ['scbi_plot','>=0.0.6']
29
29
  self.extra_deps << ['scbi_math','>=0.0.1']
30
30
 
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.50'
33
+ VERSION = '2.0.51'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
@@ -6,7 +6,7 @@ class InstallDatabase
6
6
  def initialize(type,db_path)
7
7
 
8
8
 
9
- types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
9
+ types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome','cont_viruses']
10
10
 
11
11
  if types.include?(type)
12
12
 
@@ -38,7 +38,6 @@ class PluginRemAditArtifacts < Plugin
38
38
  while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
39
39
  first += 6
40
40
  seq2.slice!(0..5)
41
-
42
41
  end
43
42
 
44
43
 
@@ -8,7 +8,7 @@ require "make_blast_db"
8
8
  # Inherit: Plugin
9
9
  ########################################################
10
10
 
11
- class PluginClassify < Plugin
11
+ class PluginUserContaminants < Plugin
12
12
 
13
13
 
14
14
  MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
@@ -47,7 +47,7 @@ class PluginClassify < Plugin
47
47
  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
48
48
  # y una secuencia de baja complejidad como entrada
49
49
 
50
- blast = BatchBlast.new("-db #{@params.get_param('classify_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
50
+ blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_classify')} -perc_identity #{@params.get_param('blast_percent_classify')} -culling_limit 1") #get classify -max_target_seqs #{MAX_TARGETS_SEQS}
51
51
 
52
52
  $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
53
53
 
@@ -133,7 +133,7 @@ class PluginClassify < Plugin
133
133
 
134
134
  comment='Path for classify database'
135
135
  default_value = File.join($FORMATTED_DB_PATH,'classify.fasta')
136
- params.check_param(errors,'classify_db','DB',default_value,comment)
136
+ params.check_param(errors,'user_contaminant_db','DB',default_value,comment)
137
137
 
138
138
 
139
139
  return errors
@@ -2,6 +2,27 @@
2
2
  # General parameters to extract Amplicons
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to get Amplicons<br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
12
+ # Help: <li>PluginMids: trimming Roche 454 MIDs</li>
13
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
14
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
15
+ # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
16
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
17
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
19
+ # Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
20
+ # Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
21
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
22
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
23
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
24
+ # Help: </ul>
25
+
5
26
  plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
6
27
 
7
28
  # do not remove cloned sequences
@@ -2,4 +2,17 @@
2
2
  # General parameters GENOMICS WITH POSSIBLE LINKER
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+ # Help: <ul>
9
+ # Help: <li>PluginLowHighSize</li>
10
+ # Help: <li>PluginMids</li>
11
+ # Help: <li>PluginIndeterminations</li>
12
+ # Help: <li>PluginAbAdapters</li>
13
+ # Help: <li>PluginContaminants</li>
14
+ # Help: <li>PluginAmplicons</li>
15
+ # Help: <li>PluginLowQuality</li>
16
+ # Help: </ul>
17
+
5
18
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
@@ -2,4 +2,6 @@
2
2
  # General parameters GENOMICS WITH POSSIBLE LINKER
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
6
+
5
7
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
@@ -0,0 +1,7 @@
1
+ # ==============================================================
2
+ # General parameters for genomics - Illumina/SOLiD (short reads)
3
+ # ==============================================================
4
+
5
+ # Help: <br/>This template is used to preprocess short reads for genomics <br/>
6
+
7
+ plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
@@ -0,0 +1,7 @@
1
+ # ==============================================================
2
+ # General parameters for genomics - Illumina/SOLiD (short reads)
3
+ # ==============================================================
4
+
5
+ # Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
6
+
7
+ plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
@@ -1,7 +1,9 @@
1
1
  # ======================================
2
- # General parameters
2
+ # General parameters TRANSCRIPTOMICS
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
6
+
5
7
 
6
8
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
7
9
 
@@ -1,7 +1,8 @@
1
1
  # ======================================
2
- # General parameters
2
+ # General parameters TRANSCRIPTOMICS PLANTS
3
3
  # ======================================
4
4
 
5
+ # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
5
6
 
6
7
  plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
7
8
 
@@ -0,0 +1,9 @@
1
+ # ======================================
2
+ # General parameters for transcriptomics - Illumina/SOLiD (short reads)
3
+ # ======================================
4
+
5
+ # Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
6
+
7
+ plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
+
9
+ contaminants_db="contaminants.fasta cont_ribosome.fasta"
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.50
5
+ version: 2.0.51
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-06-13 00:00:00 Z
13
+ date: 2012-06-20 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -97,7 +97,7 @@ dependencies:
97
97
  requirements:
98
98
  - - ">="
99
99
  - !ruby/object:Gem::Version
100
- version: 0.0.15
100
+ version: 0.0.16
101
101
  type: :runtime
102
102
  version_requirements: *id008
103
103
  - !ruby/object:Gem::Dependency
@@ -166,11 +166,10 @@ extra_rdoc_files:
166
166
  - lib/seqtrimnext/templates/amplicons.txt
167
167
  - lib/seqtrimnext/templates/genomics_454.txt
168
168
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
169
- - lib/seqtrimnext/templates/genomics_illumina.txt
170
- - lib/seqtrimnext/templates/low_quality.txt
171
- - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
169
+ - lib/seqtrimnext/templates/genomics_short_reads.txt
170
+ - lib/seqtrimnext/templates/genomics_short_reads_2.txt
172
171
  - lib/seqtrimnext/templates/transcriptomics_454.txt
173
- - lib/seqtrimnext/templates/transcriptomics_illumina.txt
172
+ - lib/seqtrimnext/templates/transcriptomics_short_reads.txt
174
173
  - lib/seqtrimnext/templates/transcriptomics_plants.txt
175
174
  - Manifest.txt
176
175
  - PostInstall.txt
@@ -255,7 +254,7 @@ files:
255
254
  - lib/seqtrimnext/plugins/plugin_adapters_old.rb
256
255
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
257
256
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
258
- - lib/seqtrimnext/plugins/plugin_classify.rb
257
+ - lib/seqtrimnext/plugins/plugin_user_contaminants.rb
259
258
  - lib/seqtrimnext/plugins/plugin_extract_inserts.rb
260
259
  - lib/seqtrimnext/plugins/plugin_find_poly_at.rb
261
260
  - lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
@@ -272,11 +271,10 @@ files:
272
271
  - lib/seqtrimnext/templates/amplicons.txt
273
272
  - lib/seqtrimnext/templates/genomics_454.txt
274
273
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
275
- - lib/seqtrimnext/templates/genomics_illumina.txt
276
- - lib/seqtrimnext/templates/low_quality.txt
277
- - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
274
+ - lib/seqtrimnext/templates/genomics_short_reads.txt
275
+ - lib/seqtrimnext/templates/genomics_short_reads_2.txt
278
276
  - lib/seqtrimnext/templates/transcriptomics_454.txt
279
- - lib/seqtrimnext/templates/transcriptomics_illumina.txt
277
+ - lib/seqtrimnext/templates/transcriptomics_short_reads.txt
280
278
  - lib/seqtrimnext/templates/transcriptomics_plants.txt
281
279
  - lib/seqtrimnext/utils/extract_samples.rb
282
280
  - lib/seqtrimnext/utils/fasta2xml.rb
@@ -1,5 +0,0 @@
1
- # ======================================
2
- # General parameters GENOMICS illumina
3
- # ======================================
4
-
5
- plugin_list = PluginLowHighSize,PluginIndeterminations,PluginContaminants,PluginLowQuality
@@ -1,5 +0,0 @@
1
- # ======================================
2
- # General parameters GENOMICS WITH POSSIBLE LINKER
3
- # ======================================
4
-
5
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
@@ -1,5 +0,0 @@
1
- # ======================================
2
- # General parameters GENOMICS WITH POSSIBLE LINKER
3
- # ======================================
4
-
5
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
@@ -1,7 +0,0 @@
1
- # ======================================
2
- # General parameters
3
- # ======================================
4
-
5
- plugin_list = PluginLowHighSize,PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginLowQuality,PluginLowComplexity
6
-
7
- contaminants_db="contaminants.fasta cont_ribosome.fasta"