seqtrimnext 2.0.51 → 2.0.52
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/Manifest.txt +3 -3
- data/README.rdoc +18 -3
- data/Rakefile +2 -1
- data/bin/parse_params.rb +5 -1
- data/bin/seqtrimnext +53 -21
- data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
- data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
- data/lib/seqtrimnext/classes/params.rb +109 -123
- data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
- data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
- data/lib/seqtrimnext/classes/sequence.rb +2 -2
- data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
- data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
- data/lib/seqtrimnext/plugins/plugin.rb +42 -12
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
- data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
- data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
- data/lib/seqtrimnext/templates/amplicons.txt +1 -8
- data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
- data/lib/seqtrimnext/templates/only_quality.txt +24 -0
- data/lib/seqtrimnext/templates/sanger.txt +25 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
- data/lib/seqtrimnext.rb +1 -1
- metadata +20 -7
- data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -9,18 +9,11 @@
|
|
9
9
|
# Help: <ul>
|
10
10
|
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
11
11
|
# Help: <li>PluginKey: trimming Roche 454 sequencing keys (typically 4 first nucleotides)</li>
|
12
|
-
# Help: <li>PluginMids: trimming Roche 454 MIDs</li>
|
12
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
|
13
13
|
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
14
14
|
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
15
|
-
# Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
|
16
|
-
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
17
|
-
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
18
|
-
# Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
|
19
15
|
# Help: <li>PluginAmplicons: getting sequences contained between two primers. Sequences with less than two primers are rejected. </li>
|
20
|
-
# Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
|
21
|
-
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
22
16
|
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
23
|
-
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
24
17
|
# Help: </ul>
|
25
18
|
|
26
19
|
plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAmplicons,PluginLowQuality
|
@@ -5,14 +5,18 @@
|
|
5
5
|
# Help: <br/>This template is used to preprocess Roche 454 genomic data <br/>
|
6
6
|
|
7
7
|
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
|
8
9
|
# Help: <ul>
|
9
|
-
# Help: <li>PluginLowHighSize</li>
|
10
|
-
# Help: <li>PluginMids</li>
|
11
|
-
# Help: <li>PluginIndeterminations</li>
|
12
|
-
# Help: <li>PluginAbAdapters</li>
|
13
|
-
|
14
|
-
# Help: <li>
|
15
|
-
|
10
|
+
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
11
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
|
12
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
13
|
+
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
14
|
+
|
15
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
16
|
+
|
17
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
18
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
19
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
16
20
|
# Help: </ul>
|
17
21
|
|
18
|
-
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality
|
22
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality
|
@@ -4,4 +4,22 @@
|
|
4
4
|
|
5
5
|
# Help: <br/>This template is used to preprocess Roche 454 genomic data including paired-end <br/>
|
6
6
|
|
7
|
-
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
|
9
|
+
# Help: <ul>
|
10
|
+
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
11
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
|
12
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
13
|
+
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
14
|
+
|
15
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
16
|
+
|
17
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
18
|
+
# Help: <li>PluginLinker: splits Roche 454 paired-end sequences by any linker found in linkers database. Linker is removed.</li>
|
19
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
20
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
21
|
+
# Help: </ul>
|
22
|
+
|
23
|
+
|
24
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginUserContaminants,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
|
25
|
+
|
@@ -3,5 +3,30 @@
|
|
3
3
|
# ==============================================================
|
4
4
|
|
5
5
|
# Help: <br/>This template is used to preprocess short reads for genomics <br/>
|
6
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
7
|
+
|
8
|
+
# Help: <ul>
|
9
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
10
|
+
|
11
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
12
|
+
|
13
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
14
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
15
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
16
|
+
# Help: </ul>
|
17
|
+
|
18
|
+
plugin_list = PluginIndeterminations,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality
|
19
|
+
|
20
|
+
generate_initial_stats = false
|
21
|
+
|
22
|
+
# Minimum insert size for every trimmed sequence
|
23
|
+
|
24
|
+
min_insert_size_trimmed = 30
|
25
|
+
min_quality=20
|
26
|
+
min_insert_size_trimmed=40
|
27
|
+
min_insert_size_paired=40
|
28
|
+
|
29
|
+
|
30
|
+
# do not remove cloned sequences
|
31
|
+
remove_clonality=false
|
6
32
|
|
7
|
-
plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality
|
@@ -3,5 +3,28 @@
|
|
3
3
|
# ==============================================================
|
4
4
|
|
5
5
|
# Help: <br/>This template is used to preprocess short reads for genomics including low complexity removal<br/>
|
6
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
7
|
+
|
8
|
+
# Help: <ul>
|
9
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
10
|
+
|
11
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
12
|
+
|
13
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
14
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
15
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
16
|
+
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
17
|
+
# Help: </ul>
|
18
|
+
|
19
|
+
plugin_list = PluginIndeterminations,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
20
|
+
|
21
|
+
generate_initial_stats = false
|
22
|
+
|
23
|
+
# Minimum insert size for every trimmed sequence
|
24
|
+
|
25
|
+
min_insert_size_trimmed = 30
|
26
|
+
|
27
|
+
|
28
|
+
# do not remove cloned sequences
|
29
|
+
remove_clonality=false
|
6
30
|
|
7
|
-
plugin_list = PluginIndeterminations,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# ======================================
|
2
|
+
# General parameters to extract Amplicons
|
3
|
+
# ======================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to remove only bad quality regions<br/>
|
6
|
+
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
|
9
|
+
# Help: <ul>
|
10
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
11
|
+
# Help: </ul>
|
12
|
+
|
13
|
+
plugin_list = PluginLowQuality
|
14
|
+
|
15
|
+
# do not remove cloned sequences
|
16
|
+
remove_clonality=false
|
17
|
+
|
18
|
+
# remove amplicons containing less or equal number of sequences indicated
|
19
|
+
|
20
|
+
generate_initial_stats = false
|
21
|
+
|
22
|
+
# Minimum insert size for every trimmed sequence
|
23
|
+
|
24
|
+
min_insert_size_trimmed = 30
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# ======================================
|
2
|
+
# General parameters SANGER
|
3
|
+
# ======================================
|
4
|
+
|
5
|
+
# Help: <br/>This template is used to preprocess Sanger genomic data <br/>
|
6
|
+
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
8
|
+
|
9
|
+
# Help: <ul>
|
10
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
11
|
+
# Help: <li>PluginAdapters: trimming adapters</li>
|
12
|
+
|
13
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
14
|
+
|
15
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
16
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
17
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
18
|
+
# Help: </ul>
|
19
|
+
|
20
|
+
plugin_list = PluginIndeterminations,PluginAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality
|
21
|
+
|
22
|
+
|
23
|
+
# do not remove cloned sequences
|
24
|
+
remove_clonality=false
|
25
|
+
|
@@ -4,7 +4,24 @@
|
|
4
4
|
|
5
5
|
# Help: <br/>This template is used to preprocess Roche 454 transcriptomic data <br/>
|
6
6
|
|
7
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
7
8
|
|
8
|
-
|
9
|
+
# Help: <ul>
|
10
|
+
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
11
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
|
12
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
13
|
+
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
14
|
+
# Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
|
15
|
+
# Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
|
16
|
+
|
17
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
18
|
+
|
19
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
20
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
21
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
22
|
+
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
23
|
+
# Help: </ul>
|
24
|
+
|
25
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
9
26
|
|
10
27
|
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|
@@ -3,7 +3,28 @@
|
|
3
3
|
# ======================================
|
4
4
|
|
5
5
|
# Help: <br/>This template is used to preprocess Roche 454 transcriptomic data. Customized for plants.<br/>
|
6
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
6
7
|
|
7
|
-
|
8
|
+
# Help: <ul>
|
9
|
+
# Help: <li>PluginLowHighSize: rejecting too short or too long sequences</li>
|
10
|
+
# Help: <li>PluginMids: trimming Roche 454 MIDs and keys</li>
|
11
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
12
|
+
# Help: <li>PluginAbAdapters: trimming the Roche 454 AB adapters</li>
|
13
|
+
# Help: <li>PluginAdapters: trimming the adapters found in SeqTrimNEXT database</li>
|
14
|
+
# Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
|
15
|
+
|
16
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
17
|
+
|
18
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
19
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
20
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
21
|
+
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
22
|
+
# Help: </ul>
|
23
|
+
|
24
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginAdapters,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
8
25
|
|
9
26
|
contaminants_db="contaminants.fasta cont_ribosome.fasta cont_mitochondrias.fasta cont_plastids.fasta"
|
27
|
+
|
28
|
+
|
29
|
+
# do not remove cloned sequences
|
30
|
+
remove_clonality=false
|
@@ -3,7 +3,29 @@
|
|
3
3
|
# ======================================
|
4
4
|
|
5
5
|
# Help: <br/>This template is used to preprocess short reads for transcriptomics<br/>
|
6
|
+
# Help: <br/><b>Plugin list and aplication order:</b><br/>
|
6
7
|
|
7
|
-
|
8
|
+
# Help: <ul>
|
9
|
+
# Help: <li>PluginIndeterminations: retaining the longest sequence fragment without indeterminations (N)</li>
|
10
|
+
# Help: <li>PluginFindPolyAt: trimming PolyA and PolyT. After a PolyT, the sequence is checked for low complexity. </li>
|
11
|
+
|
12
|
+
# Help: <li>PluginUserContaminants: discarding sequences matching any entry in the user contaminant database saving them in a separate file</li>
|
13
|
+
|
14
|
+
# Help: <li>PluginContaminants: trimming the contaminant fragments found in the contaminant database. When contamination is prevalent, sequences are rejected. </li>
|
15
|
+
# Help: <li>PluginVectors: trimming any cloning vector found in SeqTrimNEXT database. </li>
|
16
|
+
# Help: <li>PluginLowQuality: trimming low quality regions from sequences. </li>
|
17
|
+
# Help: <li>PluginLowComplexity: sequences with low complexity are stored on a separate file. </li>
|
18
|
+
# Help: </ul>
|
19
|
+
|
20
|
+
plugin_list = PluginIndeterminations,PluginFindPolyAt,PluginUserContaminants,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
8
21
|
|
9
22
|
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|
23
|
+
|
24
|
+
generate_initial_stats = false
|
25
|
+
|
26
|
+
# Minimum insert size for every trimmed sequence
|
27
|
+
|
28
|
+
min_insert_size_trimmed = 30
|
29
|
+
|
30
|
+
# do not remove cloned sequences
|
31
|
+
remove_clonality=false
|
data/lib/seqtrimnext.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: seqtrimnext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.0.
|
5
|
+
version: 2.0.52
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero & Almudena Bocinos
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-07-16 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: narray
|
@@ -123,16 +123,27 @@ dependencies:
|
|
123
123
|
type: :runtime
|
124
124
|
version_requirements: *id010
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: scbi_headers
|
127
127
|
prerelease: false
|
128
128
|
requirement: &id011 !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 0.0.2
|
134
|
+
type: :runtime
|
135
|
+
version_requirements: *id011
|
136
|
+
- !ruby/object:Gem::Dependency
|
137
|
+
name: hoe
|
138
|
+
prerelease: false
|
139
|
+
requirement: &id012 !ruby/object:Gem::Requirement
|
129
140
|
none: false
|
130
141
|
requirements:
|
131
142
|
- - ">="
|
132
143
|
- !ruby/object:Gem::Version
|
133
144
|
version: 2.8.0
|
134
145
|
type: :development
|
135
|
-
version_requirements: *
|
146
|
+
version_requirements: *id012
|
136
147
|
description: SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
|
137
148
|
email:
|
138
149
|
- dariogf@gmail.com & alkoke@gmail.com
|
@@ -164,6 +175,8 @@ extensions: []
|
|
164
175
|
extra_rdoc_files:
|
165
176
|
- History.txt
|
166
177
|
- lib/seqtrimnext/templates/amplicons.txt
|
178
|
+
- lib/seqtrimnext/templates/sanger.txt
|
179
|
+
- lib/seqtrimnext/templates/only_quality.txt
|
167
180
|
- lib/seqtrimnext/templates/genomics_454.txt
|
168
181
|
- lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
169
182
|
- lib/seqtrimnext/templates/genomics_short_reads.txt
|
@@ -200,7 +213,7 @@ files:
|
|
200
213
|
- lib/seqtrimnext/actions/action_ab_adapter.rb
|
201
214
|
- lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
202
215
|
- lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
203
|
-
- lib/seqtrimnext/actions/
|
216
|
+
- lib/seqtrimnext/actions/action_user_contaminant.rb
|
204
217
|
- lib/seqtrimnext/actions/action_empty_insert.rb
|
205
218
|
- lib/seqtrimnext/actions/action_ignore_repeated.rb
|
206
219
|
- lib/seqtrimnext/actions/action_indetermination.rb
|
@@ -251,7 +264,6 @@ files:
|
|
251
264
|
- lib/seqtrimnext/plugins/plugin.rb
|
252
265
|
- lib/seqtrimnext/plugins/plugin_ab_adapters.rb
|
253
266
|
- lib/seqtrimnext/plugins/plugin_adapters.rb
|
254
|
-
- lib/seqtrimnext/plugins/plugin_adapters_old.rb
|
255
267
|
- lib/seqtrimnext/plugins/plugin_amplicons.rb
|
256
268
|
- lib/seqtrimnext/plugins/plugin_contaminants.rb
|
257
269
|
- lib/seqtrimnext/plugins/plugin_user_contaminants.rb
|
@@ -265,10 +277,11 @@ files:
|
|
265
277
|
- lib/seqtrimnext/plugins/plugin_low_high_size.rb
|
266
278
|
- lib/seqtrimnext/plugins/plugin_low_quality.rb
|
267
279
|
- lib/seqtrimnext/plugins/plugin_mids.rb
|
268
|
-
- lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
|
269
280
|
- lib/seqtrimnext/plugins/plugin_short_insert.rb
|
270
281
|
- lib/seqtrimnext/plugins/plugin_vectors.rb
|
271
282
|
- lib/seqtrimnext/templates/amplicons.txt
|
283
|
+
- lib/seqtrimnext/templates/sanger.txt
|
284
|
+
- lib/seqtrimnext/templates/only_quality.txt
|
272
285
|
- lib/seqtrimnext/templates/genomics_454.txt
|
273
286
|
- lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
274
287
|
- lib/seqtrimnext/templates/genomics_short_reads.txt
|
@@ -1,165 +0,0 @@
|
|
1
|
-
require "plugin"
|
2
|
-
|
3
|
-
########################################################
|
4
|
-
# Author: Almudena Bocinos Rioboo
|
5
|
-
#
|
6
|
-
# Defines the main methods that are necessary to execute PluginAdapters
|
7
|
-
# Inherit: Plugin
|
8
|
-
########################################################
|
9
|
-
|
10
|
-
class PluginAdaptersOld < Plugin
|
11
|
-
|
12
|
-
def get_type_adapter(p_start,p_end,seq)
|
13
|
-
#if q_beg is nearer the left, add adapter action by the left,
|
14
|
-
#if q_end esta is nearer the right , add adapter action by the right
|
15
|
-
#NOTE: If the adapter is very near from left and rigth,
|
16
|
-
#then the sequence isn't valid, because almost sequence is adapter.
|
17
|
-
|
18
|
-
|
19
|
-
v1= p_end.to_i
|
20
|
-
v2= p_start.to_i
|
21
|
-
|
22
|
-
# puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
|
23
|
-
|
24
|
-
# puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
|
25
|
-
if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
|
26
|
-
type = "ActionLeftAdapter"
|
27
|
-
|
28
|
-
else
|
29
|
-
type = "ActionRightAdapter"
|
30
|
-
|
31
|
-
end
|
32
|
-
return type
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
def cut_by_right(adapter,seq)
|
37
|
-
|
38
|
-
left_size = adapter.q_beg-seq.insert_start+1
|
39
|
-
right_size = seq.insert_end-adapter.q_end+1
|
40
|
-
left_size=0 if (left_size<0)
|
41
|
-
right_size=0 if (right_size<0)
|
42
|
-
|
43
|
-
return (left_size>(right_size/2).to_i)
|
44
|
-
|
45
|
-
end
|
46
|
-
|
47
|
-
#Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
|
48
|
-
def execute(seqs)
|
49
|
-
blasts= do_blasts(seqs)
|
50
|
-
|
51
|
-
seqs.each_with_index do |s,i|
|
52
|
-
exec_seq(s,blasts.querys[i])
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def do_blasts(seqs)
|
57
|
-
# find MIDS with less results than max_target_seqs value
|
58
|
-
blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
59
|
-
$LOG.debug('BLAST:'+blast.get_blast_cmd)
|
60
|
-
|
61
|
-
fastas=[]
|
62
|
-
|
63
|
-
seqs.each do |seq|
|
64
|
-
fastas.push ">"+seq.seq_name
|
65
|
-
fastas.push seq.seq_fasta
|
66
|
-
end
|
67
|
-
|
68
|
-
# fastas=fastas.join("\n")
|
69
|
-
|
70
|
-
blast_table_results = blast.do_blast(fastas)
|
71
|
-
|
72
|
-
# puts blast_table_results.inspect
|
73
|
-
|
74
|
-
return blast_table_results
|
75
|
-
end
|
76
|
-
|
77
|
-
|
78
|
-
def exec_seq(seq,blast_query)
|
79
|
-
if blast_query.query_id != seq.seq_name
|
80
|
-
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
81
|
-
end
|
82
|
-
|
83
|
-
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
|
84
|
-
|
85
|
-
|
86
|
-
# blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
|
87
|
-
|
88
|
-
# blast with only one sequence, no with many sequences from a database
|
89
|
-
#---------------------------------------------------------------------
|
90
|
-
|
91
|
-
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
|
92
|
-
|
93
|
-
#blast_table_results = BlastTableResult.new(res)
|
94
|
-
|
95
|
-
# blast_table_results.inspect
|
96
|
-
|
97
|
-
adapters=[]
|
98
|
-
# blast_table_results.querys.each do |query| # first round to save adapters without overlap
|
99
|
-
merge_hits(blast_query,adapters)
|
100
|
-
# end
|
101
|
-
|
102
|
-
begin
|
103
|
-
adapters2=adapters # second round to save adapters without overlap
|
104
|
-
adapters = []
|
105
|
-
merge_hits(adapters2,adapters)
|
106
|
-
end until (adapters2.count == adapters.count)
|
107
|
-
|
108
|
-
actions=[]
|
109
|
-
adapter_size=0
|
110
|
-
# @stats['adapter_size']={}
|
111
|
-
adapters.each do |ad| # adds the correspondent action to the sequence
|
112
|
-
|
113
|
-
type = get_type_adapter(ad.q_beg,ad.q_end,seq)
|
114
|
-
a = seq.new_action(ad.q_beg,ad.q_end,type)
|
115
|
-
# puts " state left_action #{a.left_action} right_action #{a.right_action}"
|
116
|
-
|
117
|
-
|
118
|
-
adapter_size=ad.q_end-ad.q_beg+1
|
119
|
-
|
120
|
-
if cut_by_right(ad,seq)
|
121
|
-
|
122
|
-
# puts "action right end1 #{seq.insert_end}"
|
123
|
-
|
124
|
-
a.right_action=true #mark rigth action to get the left insert
|
125
|
-
else
|
126
|
-
|
127
|
-
# puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
|
128
|
-
|
129
|
-
a.left_action = true #mark left action to get the right insert
|
130
|
-
|
131
|
-
end
|
132
|
-
|
133
|
-
a.message = ad.subject_id
|
134
|
-
a.reversed = ad.reversed
|
135
|
-
actions.push a
|
136
|
-
|
137
|
-
# @stats[:adapter_size]={adapter_size => 1}
|
138
|
-
add_stats('adapter_size',adapter_size)
|
139
|
-
|
140
|
-
end
|
141
|
-
seq.add_actions(actions)
|
142
|
-
#
|
143
|
-
end
|
144
|
-
|
145
|
-
#Returns an array with the errors due to parameters are missing
|
146
|
-
def self.check_params(params)
|
147
|
-
errors=[]
|
148
|
-
|
149
|
-
comment='Blast E-value used as cut-off when searching for adapters or primers'
|
150
|
-
default_value = 1e-6
|
151
|
-
params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
|
152
|
-
|
153
|
-
comment='Minimum required identity (%) for a reliable adapter'
|
154
|
-
default_value = 95
|
155
|
-
params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
|
156
|
-
|
157
|
-
comment='Path for adapter database'
|
158
|
-
default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
|
159
|
-
params.check_param(errors,'adapters_db','DB',default_value,comment)
|
160
|
-
|
161
|
-
return errors
|
162
|
-
end
|
163
|
-
|
164
|
-
|
165
|
-
end
|