seqtrimnext 2.0.51 → 2.0.52

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -9,18 +9,11 @@
9
9
  # Help: <ul>
10
10
  # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
11
  # Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
12
- # Help: <li>PluginMids: trimming Roche 454 MIDs</li>
12
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
13
13
  # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
14
14
  # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
15
- # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
16
- # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
17
- # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
- # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
19
15
  # Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
20
- # Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
21
- # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
22
16
  # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
23
- # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
24
17
  # Help: </ul>
25
18
 
26
19
  plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
@@ -5,14 +5,18 @@
5
5
  # Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
6
6
 
7
7
  # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
8
9
  # Help: <ul>
9
- # Help: <li>PluginLowHighSize</li>
10
- # Help: <li>PluginMids</li>
11
- # Help: <li>PluginIndeterminations</li>
12
- # Help: <li>PluginAbAdapters</li>
13
- # Help: <li>PluginContaminants</li>
14
- # Help: <li>PluginAmplicons</li>
15
- # Help: <li>PluginLowQuality</li>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
12
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
13
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
14
+
15
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
16
+
17
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
19
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
16
20
  # Help: </ul>
17
21
 
18
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
22
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality
@@ -4,4 +4,22 @@
4
4
 
5
5
  # Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
6
6
 
7
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
12
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
13
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
14
+
15
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
16
+
17
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
18
+ # Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
19
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
20
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
21
+ # Help: </ul>
22
+
23
+
24
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginUserContaminants,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
25
+
@@ -3,5 +3,30 @@
3
3
  # ==============================================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess short reads for genomics <br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
7
+
8
+ # Help: <ul>
9
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
10
+
11
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
12
+
13
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
14
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
15
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
16
+ # Help: </ul>
17
+
18
+ plugin_list = PluginIndeterminations,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality
19
+
20
+ generate_initial_stats = false
21
+
22
+ # Minimum insert size for every trimmed sequence
23
+
24
+ min_insert_size_trimmed = 30
25
+ min_quality=20
26
+ min_insert_size_trimmed=40
27
+ min_insert_size_paired=40
28
+
29
+
30
+ # do not remove cloned sequences
31
+ remove_clonality=false
6
32
 
7
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
@@ -3,5 +3,28 @@
3
3
  # ==============================================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
7
+
8
+ # Help: <ul>
9
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
10
+
11
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
12
+
13
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
14
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
15
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
16
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
17
+ # Help: </ul>
18
+
19
+ plugin_list = PluginIndeterminations,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
20
+
21
+ generate_initial_stats = false
22
+
23
+ # Minimum insert size for every trimmed sequence
24
+
25
+ min_insert_size_trimmed = 30
26
+
27
+
28
+ # do not remove cloned sequences
29
+ remove_clonality=false
6
30
 
7
- plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
@@ -0,0 +1,24 @@
1
+ # ======================================
2
+ # General parameters to extract Amplicons
3
+ # ======================================
4
+
5
+ # Help: <br/>This template is used to remove only bad quality regions<br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
11
+ # Help: </ul>
12
+
13
+ plugin_list = PluginLowQuality
14
+
15
+ # do not remove cloned sequences
16
+ remove_clonality=false
17
+
18
+ # remove amplicons containing less or equal number of sequences indicated
19
+
20
+ generate_initial_stats = false
21
+
22
+ # Minimum insert size for every trimmed sequence
23
+
24
+ min_insert_size_trimmed = 30
@@ -0,0 +1,25 @@
1
+ # ======================================
2
+ # General parameters SANGER
3
+ # ======================================
4
+
5
+ # Help: <br/>This template is used to preprocess Sanger genomic data <br/>
6
+
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
8
+
9
+ # Help: <ul>
10
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
11
+ # Help: <li>PluginAdapters: trimming adapters</li>
12
+
13
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
14
+
15
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
16
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
17
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
18
+ # Help: </ul>
19
+
20
+ plugin_list = PluginIndeterminations,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality
21
+
22
+
23
+ # do not remove cloned sequences
24
+ remove_clonality=false
25
+
@@ -4,7 +4,24 @@
4
4
 
5
5
  # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
6
6
 
7
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
7
8
 
8
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
9
+ # Help: <ul>
10
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
11
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
12
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
13
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
14
+ # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
15
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
16
+
17
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
18
+
19
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
20
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
21
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
22
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
23
+ # Help: </ul>
24
+
25
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
9
26
 
10
27
  contaminants_db="contaminants.fasta cont_ribosome.fasta"
@@ -3,7 +3,28 @@
3
3
  # ======================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
6
7
 
7
- plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
+ # Help: <ul>
9
+ # Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
10
+ # Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
11
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
12
+ # Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
13
+ # Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
14
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
15
+
16
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
17
+
18
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
19
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
20
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
21
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
22
+ # Help: </ul>
23
+
24
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
25
 
9
26
  contaminants_db="contaminants.fasta cont_ribosome.fasta cont_mitochondrias.fasta cont_plastids.fasta"
27
+
28
+
29
+ # do not remove cloned sequences
30
+ remove_clonality=false
@@ -3,7 +3,29 @@
3
3
  # ======================================
4
4
 
5
5
  # Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
6
+ # Help: <br/><b>Plugin list and aplication order:</b><br/>
6
7
 
7
- plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
+ # Help: <ul>
9
+ # Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
10
+ # Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
11
+
12
+ # Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
13
+
14
+ # Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
15
+ # Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
16
+ # Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
17
+ # Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
18
+ # Help: </ul>
19
+
20
+ plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
8
21
 
9
22
  contaminants_db="contaminants.fasta cont_ribosome.fasta"
23
+
24
+ generate_initial_stats = false
25
+
26
+ # Minimum insert size for every trimmed sequence
27
+
28
+ min_insert_size_trimmed = 30
29
+
30
+ # do not remove cloned sequences
31
+ remove_clonality=false
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.51'
33
+ VERSION = '2.0.52'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.51
5
+ version: 2.0.52
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-06-20 00:00:00 Z
13
+ date: 2012-07-16 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -123,16 +123,27 @@ dependencies:
123
123
  type: :runtime
124
124
  version_requirements: *id010
125
125
  - !ruby/object:Gem::Dependency
126
- name: hoe
126
+ name: scbi_headers
127
127
  prerelease: false
128
128
  requirement: &id011 !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: 0.0.2
134
+ type: :runtime
135
+ version_requirements: *id011
136
+ - !ruby/object:Gem::Dependency
137
+ name: hoe
138
+ prerelease: false
139
+ requirement: &id012 !ruby/object:Gem::Requirement
129
140
  none: false
130
141
  requirements:
131
142
  - - ">="
132
143
  - !ruby/object:Gem::Version
133
144
  version: 2.8.0
134
145
  type: :development
135
- version_requirements: *id011
146
+ version_requirements: *id012
136
147
  description: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
137
148
  email:
138
149
  - dariogf@gmail.com & alkoke@gmail.com
@@ -164,6 +175,8 @@ extensions: []
164
175
  extra_rdoc_files:
165
176
  - History.txt
166
177
  - lib/seqtrimnext/templates/amplicons.txt
178
+ - lib/seqtrimnext/templates/sanger.txt
179
+ - lib/seqtrimnext/templates/only_quality.txt
167
180
  - lib/seqtrimnext/templates/genomics_454.txt
168
181
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
169
182
  - lib/seqtrimnext/templates/genomics_short_reads.txt
@@ -200,7 +213,7 @@ files:
200
213
  - lib/seqtrimnext/actions/action_ab_adapter.rb
201
214
  - lib/seqtrimnext/actions/action_ab_far_adapter.rb
202
215
  - lib/seqtrimnext/actions/action_ab_left_adapter.rb
203
- - lib/seqtrimnext/actions/action_classify.rb
216
+ - lib/seqtrimnext/actions/action_user_contaminant.rb
204
217
  - lib/seqtrimnext/actions/action_empty_insert.rb
205
218
  - lib/seqtrimnext/actions/action_ignore_repeated.rb
206
219
  - lib/seqtrimnext/actions/action_indetermination.rb
@@ -251,7 +264,6 @@ files:
251
264
  - lib/seqtrimnext/plugins/plugin.rb
252
265
  - lib/seqtrimnext/plugins/plugin_ab_adapters.rb
253
266
  - lib/seqtrimnext/plugins/plugin_adapters.rb
254
- - lib/seqtrimnext/plugins/plugin_adapters_old.rb
255
267
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
256
268
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
257
269
  - lib/seqtrimnext/plugins/plugin_user_contaminants.rb
@@ -265,10 +277,11 @@ files:
265
277
  - lib/seqtrimnext/plugins/plugin_low_high_size.rb
266
278
  - lib/seqtrimnext/plugins/plugin_low_quality.rb
267
279
  - lib/seqtrimnext/plugins/plugin_mids.rb
268
- - lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
269
280
  - lib/seqtrimnext/plugins/plugin_short_insert.rb
270
281
  - lib/seqtrimnext/plugins/plugin_vectors.rb
271
282
  - lib/seqtrimnext/templates/amplicons.txt
283
+ - lib/seqtrimnext/templates/sanger.txt
284
+ - lib/seqtrimnext/templates/only_quality.txt
272
285
  - lib/seqtrimnext/templates/genomics_454.txt
273
286
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
274
287
  - lib/seqtrimnext/templates/genomics_short_reads.txt
@@ -1,165 +0,0 @@
1
- require "plugin"
2
-
3
- ########################################################
4
- # Author: Almudena Bocinos Rioboo
5
- #
6
- # Defines the main methods that are necessary to execute PluginAdapters
7
- # Inherit: Plugin
8
- ########################################################
9
-
10
- class PluginAdaptersOld < Plugin
11
-
12
- def get_type_adapter(p_start,p_end,seq)
13
- #if q_beg is nearer the left, add adapter action by the left,
14
- #if q_end esta is nearer the right , add adapter action by the right
15
- #NOTE: If the adapter is very near from left and rigth,
16
- #then the sequence isn't valid, because almost sequence is adapter.
17
-
18
-
19
- v1= p_end.to_i
20
- v2= p_start.to_i
21
-
22
- # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
-
24
- # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
- if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
- type = "ActionLeftAdapter"
27
-
28
- else
29
- type = "ActionRightAdapter"
30
-
31
- end
32
- return type
33
- end
34
-
35
-
36
- def cut_by_right(adapter,seq)
37
-
38
- left_size = adapter.q_beg-seq.insert_start+1
39
- right_size = seq.insert_end-adapter.q_end+1
40
- left_size=0 if (left_size<0)
41
- right_size=0 if (right_size<0)
42
-
43
- return (left_size>(right_size/2).to_i)
44
-
45
- end
46
-
47
- #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
48
- def execute(seqs)
49
- blasts= do_blasts(seqs)
50
-
51
- seqs.each_with_index do |s,i|
52
- exec_seq(s,blasts.querys[i])
53
- end
54
- end
55
-
56
- def do_blasts(seqs)
57
- # find MIDS with less results than max_target_seqs value
58
- blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
59
- $LOG.debug('BLAST:'+blast.get_blast_cmd)
60
-
61
- fastas=[]
62
-
63
- seqs.each do |seq|
64
- fastas.push ">"+seq.seq_name
65
- fastas.push seq.seq_fasta
66
- end
67
-
68
- # fastas=fastas.join("\n")
69
-
70
- blast_table_results = blast.do_blast(fastas)
71
-
72
- # puts blast_table_results.inspect
73
-
74
- return blast_table_results
75
- end
76
-
77
-
78
- def exec_seq(seq,blast_query)
79
- if blast_query.query_id != seq.seq_name
80
- raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
81
- end
82
-
83
- $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
84
-
85
-
86
- # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
87
-
88
- # blast with only one sequence, no with many sequences from a database
89
- #---------------------------------------------------------------------
90
-
91
- # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
92
-
93
- #blast_table_results = BlastTableResult.new(res)
94
-
95
- # blast_table_results.inspect
96
-
97
- adapters=[]
98
- # blast_table_results.querys.each do |query| # first round to save adapters without overlap
99
- merge_hits(blast_query,adapters)
100
- # end
101
-
102
- begin
103
- adapters2=adapters # second round to save adapters without overlap
104
- adapters = []
105
- merge_hits(adapters2,adapters)
106
- end until (adapters2.count == adapters.count)
107
-
108
- actions=[]
109
- adapter_size=0
110
- # @stats['adapter_size']={}
111
- adapters.each do |ad| # adds the correspondent action to the sequence
112
-
113
- type = get_type_adapter(ad.q_beg,ad.q_end,seq)
114
- a = seq.new_action(ad.q_beg,ad.q_end,type)
115
- # puts " state left_action #{a.left_action} right_action #{a.right_action}"
116
-
117
-
118
- adapter_size=ad.q_end-ad.q_beg+1
119
-
120
- if cut_by_right(ad,seq)
121
-
122
- # puts "action right end1 #{seq.insert_end}"
123
-
124
- a.right_action=true #mark rigth action to get the left insert
125
- else
126
-
127
- # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
128
-
129
- a.left_action = true #mark left action to get the right insert
130
-
131
- end
132
-
133
- a.message = ad.subject_id
134
- a.reversed = ad.reversed
135
- actions.push a
136
-
137
- # @stats[:adapter_size]={adapter_size => 1}
138
- add_stats('adapter_size',adapter_size)
139
-
140
- end
141
- seq.add_actions(actions)
142
- #
143
- end
144
-
145
- #Returns an array with the errors due to parameters are missing
146
- def self.check_params(params)
147
- errors=[]
148
-
149
- comment='Blast E-value used as cut-off when searching for adapters or primers'
150
- default_value = 1e-6
151
- params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
152
-
153
- comment='Minimum required identity (%) for a reliable adapter'
154
- default_value = 95
155
- params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
156
-
157
- comment='Path for adapter database'
158
- default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
159
- params.check_param(errors,'adapters_db','DB',default_value,comment)
160
-
161
- return errors
162
- end
163
-
164
-
165
- end