seqtrimnext 2.0.51 → 2.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -9,18 +9,11 @@
9
9
  # Help: <ul>
10
10
  # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
11
  # Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
12
- # Help: <li>PluginMids: trimming Roche 454 MIDs</li>
12
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
13
13
  # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
14
14
  # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
15
- # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
16
- # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
17
- # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
- # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
19
15
  # Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
20
- # Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
21
- # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
22
16
  # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
23
- # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
24
17
  # Help: </ul>
25
18
 
26
19
  plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
@@ -5,14 +5,18 @@
5
5
  # Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
6
6
 
7
7
  # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
8
9
  # Help: <ul>
9
- # Help: <li>PluginLowHighSize</li>
10
- # Help: <li>PluginMids</li>
11
- # Help: <li>PluginIndeterminations</li>
12
- # Help: <li>PluginAbAdapters</li>
13
- # Help: <li>PluginContaminants</li>
14
- # Help: <li>PluginAmplicons</li>
15
- # Help: <li>PluginLowQuality</li>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
12
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
13
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
14
+
15
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
16
+
17
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
19
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
16
20
  # Help: </ul>
17
21
 
18
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
22
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality
@@ -4,4 +4,22 @@
4
4
 
5
5
  # Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
6
6
 
7
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
12
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
13
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
14
+
15
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
16
+
17
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
+ # Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
19
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
20
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
21
+ # Help: </ul>
22
+
23
+
24
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginUserContaminants,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
25
+
@@ -3,5 +3,30 @@
3
3
  # ==============================================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess short reads for genomics <br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
7
+
8
+ # Help: <ul>
9
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
10
+
11
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
12
+
13
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
14
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
15
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
16
+ # Help: </ul>
17
+
18
+ plugin_list = PluginIndeterminations,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality
19
+
20
+ generate_initial_stats = false
21
+
22
+ # Minimum insert size for every trimmed sequence
23
+
24
+ min_insert_size_trimmed = 30
25
+ min_quality=20
26
+ min_insert_size_trimmed=40
27
+ min_insert_size_paired=40
28
+
29
+
30
+ # do not remove cloned sequences
31
+ remove_clonality=false
6
32
 
7
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
@@ -3,5 +3,28 @@
3
3
  # ==============================================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
7
+
8
+ # Help: <ul>
9
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
10
+
11
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
12
+
13
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
14
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
15
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
16
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
17
+ # Help: </ul>
18
+
19
+ plugin_list = PluginIndeterminations,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
20
+
21
+ generate_initial_stats = false
22
+
23
+ # Minimum insert size for every trimmed sequence
24
+
25
+ min_insert_size_trimmed = 30
26
+
27
+
28
+ # do not remove cloned sequences
29
+ remove_clonality=false
6
30
 
7
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
@@ -0,0 +1,24 @@
1
+ # ======================================
2
+ # General parameters to extract Amplicons
3
+ # ======================================
4
+
5
+ # Help: <br/>This template is used to remove only bad quality regions<br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
11
+ # Help: </ul>
12
+
13
+ plugin_list = PluginLowQuality
14
+
15
+ # do not remove cloned sequences
16
+ remove_clonality=false
17
+
18
+ # remove amplicons containing less or equal number of sequences indicated
19
+
20
+ generate_initial_stats = false
21
+
22
+ # Minimum insert size for every trimmed sequence
23
+
24
+ min_insert_size_trimmed = 30
@@ -0,0 +1,25 @@
1
+ # ======================================
2
+ # General parameters SANGER
3
+ # ======================================
4
+
5
+ # Help: <br/>This template is used to preprocess Sanger genomic data <br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
11
+ # Help: <li>PluginAdapters: trimming adapters</li>
12
+
13
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
14
+
15
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
16
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
17
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
18
+ # Help: </ul>
19
+
20
+ plugin_list = PluginIndeterminations,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality
21
+
22
+
23
+ # do not remove cloned sequences
24
+ remove_clonality=false
25
+
@@ -4,7 +4,24 @@
4
4
 
5
5
  # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
6
6
 
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
7
8
 
8
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
12
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
13
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
14
+ # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
15
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
16
+
17
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
18
+
19
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
20
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
21
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
22
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
23
+ # Help: </ul>
24
+
25
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
9
26
 
10
27
  contaminants_db="contaminants.fasta cont_ribosome.fasta"
@@ -3,7 +3,28 @@
3
3
  # ======================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
6
7
 
7
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
+ # Help: <ul>
9
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
10
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
11
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
12
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
13
+ # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
14
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
15
+
16
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
17
+
18
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
19
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
20
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
21
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
22
+ # Help: </ul>
23
+
24
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
25
 
9
26
  contaminants_db="contaminants.fasta cont_ribosome.fasta cont_mitochondrias.fasta cont_plastids.fasta"
27
+
28
+
29
+ # do not remove cloned sequences
30
+ remove_clonality=false
@@ -3,7 +3,29 @@
3
3
  # ======================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
6
7
 
7
- plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
+ # Help: <ul>
9
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
10
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
11
+
12
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
13
+
14
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
15
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
16
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
17
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
18
+ # Help: </ul>
19
+
20
+ plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
21
 
9
22
  contaminants_db="contaminants.fasta cont_ribosome.fasta"
23
+
24
+ generate_initial_stats = false
25
+
26
+ # Minimum insert size for every trimmed sequence
27
+
28
+ min_insert_size_trimmed = 30
29
+
30
+ # do not remove cloned sequences
31
+ remove_clonality=false
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.51'
33
+ VERSION = '2.0.52'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.51
5
+ version: 2.0.52
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-06-20 00:00:00 Z
13
+ date: 2012-07-16 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -123,16 +123,27 @@ dependencies:
123
123
  type: :runtime
124
124
  version_requirements: *id010
125
125
  - !ruby/object:Gem::Dependency
126
- name: hoe
126
+ name: scbi_headers
127
127
  prerelease: false
128
128
  requirement: &id011 !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: 0.0.2
134
+ type: :runtime
135
+ version_requirements: *id011
136
+ - !ruby/object:Gem::Dependency
137
+ name: hoe
138
+ prerelease: false
139
+ requirement: &id012 !ruby/object:Gem::Requirement
129
140
  none: false
130
141
  requirements:
131
142
  - - ">="
132
143
  - !ruby/object:Gem::Version
133
144
  version: 2.8.0
134
145
  type: :development
135
- version_requirements: *id011
146
+ version_requirements: *id012
136
147
  description: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
137
148
  email:
138
149
  - dariogf@gmail.com & alkoke@gmail.com
@@ -164,6 +175,8 @@ extensions: []
164
175
  extra_rdoc_files:
165
176
  - History.txt
166
177
  - lib/seqtrimnext/templates/amplicons.txt
178
+ - lib/seqtrimnext/templates/sanger.txt
179
+ - lib/seqtrimnext/templates/only_quality.txt
167
180
  - lib/seqtrimnext/templates/genomics_454.txt
168
181
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
169
182
  - lib/seqtrimnext/templates/genomics_short_reads.txt
@@ -200,7 +213,7 @@ files:
200
213
  - lib/seqtrimnext/actions/action_ab_adapter.rb
201
214
  - lib/seqtrimnext/actions/action_ab_far_adapter.rb
202
215
  - lib/seqtrimnext/actions/action_ab_left_adapter.rb
203
- - lib/seqtrimnext/actions/action_classify.rb
216
+ - lib/seqtrimnext/actions/action_user_contaminant.rb
204
217
  - lib/seqtrimnext/actions/action_empty_insert.rb
205
218
  - lib/seqtrimnext/actions/action_ignore_repeated.rb
206
219
  - lib/seqtrimnext/actions/action_indetermination.rb
@@ -251,7 +264,6 @@ files:
251
264
  - lib/seqtrimnext/plugins/plugin.rb
252
265
  - lib/seqtrimnext/plugins/plugin_ab_adapters.rb
253
266
  - lib/seqtrimnext/plugins/plugin_adapters.rb
254
- - lib/seqtrimnext/plugins/plugin_adapters_old.rb
255
267
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
256
268
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
257
269
  - lib/seqtrimnext/plugins/plugin_user_contaminants.rb
@@ -265,10 +277,11 @@ files:
265
277
  - lib/seqtrimnext/plugins/plugin_low_high_size.rb
266
278
  - lib/seqtrimnext/plugins/plugin_low_quality.rb
267
279
  - lib/seqtrimnext/plugins/plugin_mids.rb
268
- - lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
269
280
  - lib/seqtrimnext/plugins/plugin_short_insert.rb
270
281
  - lib/seqtrimnext/plugins/plugin_vectors.rb
271
282
  - lib/seqtrimnext/templates/amplicons.txt
283
+ - lib/seqtrimnext/templates/sanger.txt
284
+ - lib/seqtrimnext/templates/only_quality.txt
272
285
  - lib/seqtrimnext/templates/genomics_454.txt
273
286
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
274
287
  - lib/seqtrimnext/templates/genomics_short_reads.txt
@@ -1,165 +0,0 @@
1
- require "plugin"
2
-
3
- ########################################################
4
- # Author: Almudena Bocinos Rioboo
5
- #
6
- # Defines the main methods that are necessary to execute PluginAdapters
7
- # Inherit: Plugin
8
- ########################################################
9
-
10
- class PluginAdaptersOld < Plugin
11
-
12
- def get_type_adapter(p_start,p_end,seq)
13
- #if q_beg is nearer the left, add adapter action by the left,
14
- #if q_end esta is nearer the right , add adapter action by the right
15
- #NOTE: If the adapter is very near from left and rigth,
16
- #then the sequence isn't valid, because almost sequence is adapter.
17
-
18
-
19
- v1= p_end.to_i
20
- v2= p_start.to_i
21
-
22
- # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
-
24
- # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
- if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
- type = "ActionLeftAdapter"
27
-
28
- else
29
- type = "ActionRightAdapter"
30
-
31
- end
32
- return type
33
- end
34
-
35
-
36
- def cut_by_right(adapter,seq)
37
-
38
- left_size = adapter.q_beg-seq.insert_start+1
39
- right_size = seq.insert_end-adapter.q_end+1
40
- left_size=0 if (left_size<0)
41
- right_size=0 if (right_size<0)
42
-
43
- return (left_size>(right_size/2).to_i)
44
-
45
- end
46
-
47
- #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
48
- def execute(seqs)
49
- blasts= do_blasts(seqs)
50
-
51
- seqs.each_with_index do |s,i|
52
- exec_seq(s,blasts.querys[i])
53
- end
54
- end
55
-
56
- def do_blasts(seqs)
57
- # find MIDS with less results than max_target_seqs value
58
- blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
59
- $LOG.debug('BLAST:'+blast.get_blast_cmd)
60
-
61
- fastas=[]
62
-
63
- seqs.each do |seq|
64
- fastas.push ">"+seq.seq_name
65
- fastas.push seq.seq_fasta
66
- end
67
-
68
- # fastas=fastas.join("\n")
69
-
70
- blast_table_results = blast.do_blast(fastas)
71
-
72
- # puts blast_table_results.inspect
73
-
74
- return blast_table_results
75
- end
76
-
77
-
78
- def exec_seq(seq,blast_query)
79
- if blast_query.query_id != seq.seq_name
80
- raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
81
- end
82
-
83
- $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
84
-
85
-
86
- # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
87
-
88
- # blast with only one sequence, no with many sequences from a database
89
- #---------------------------------------------------------------------
90
-
91
- # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
92
-
93
- #blast_table_results = BlastTableResult.new(res)
94
-
95
- # blast_table_results.inspect
96
-
97
- adapters=[]
98
- # blast_table_results.querys.each do |query| # first round to save adapters without overlap
99
- merge_hits(blast_query,adapters)
100
- # end
101
-
102
- begin
103
- adapters2=adapters # second round to save adapters without overlap
104
- adapters = []
105
- merge_hits(adapters2,adapters)
106
- end until (adapters2.count == adapters.count)
107
-
108
- actions=[]
109
- adapter_size=0
110
- # @stats['adapter_size']={}
111
- adapters.each do |ad| # adds the correspondent action to the sequence
112
-
113
- type = get_type_adapter(ad.q_beg,ad.q_end,seq)
114
- a = seq.new_action(ad.q_beg,ad.q_end,type)
115
- # puts " state left_action #{a.left_action} right_action #{a.right_action}"
116
-
117
-
118
- adapter_size=ad.q_end-ad.q_beg+1
119
-
120
- if cut_by_right(ad,seq)
121
-
122
- # puts "action right end1 #{seq.insert_end}"
123
-
124
- a.right_action=true #mark rigth action to get the left insert
125
- else
126
-
127
- # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
128
-
129
- a.left_action = true #mark left action to get the right insert
130
-
131
- end
132
-
133
- a.message = ad.subject_id
134
- a.reversed = ad.reversed
135
- actions.push a
136
-
137
- # @stats[:adapter_size]={adapter_size => 1}
138
- add_stats('adapter_size',adapter_size)
139
-
140
- end
141
- seq.add_actions(actions)
142
- #
143
- end
144
-
145
- #Returns an array with the errors due to parameters are missing
146
- def self.check_params(params)
147
- errors=[]
148
-
149
- comment='Blast E-value used as cut-off when searching for adapters or primers'
150
- default_value = 1e-6
151
- params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
152
-
153
- comment='Minimum required identity (%) for a reliable adapter'
154
- default_value = 95
155
- params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
156
-
157
- comment='Path for adapter database'
158
- default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
159
- params.check_param(errors,'adapters_db','DB',default_value,comment)
160
-
161
- return errors
162
- end
163
-
164
-
165
- end