seqtrimnext 2.0.51 → 2.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/Manifest.txt +3 -3
- data/README.rdoc +18 -3
- data/Rakefile +2 -1
- data/bin/parse_params.rb +5 -1
- data/bin/seqtrimnext +53 -21
- data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
- data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
- data/lib/seqtrimnext/classes/params.rb +109 -123
- data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
- data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
- data/lib/seqtrimnext/classes/sequence.rb +2 -2
- data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
- data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
- data/lib/seqtrimnext/plugins/plugin.rb +42 -12
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
- data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
- data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
- data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
- data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
- data/lib/seqtrimnext/templates/amplicons.txt +1 -8
- data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
- data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
- data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
- data/lib/seqtrimnext/templates/only_quality.txt +24 -0
- data/lib/seqtrimnext/templates/sanger.txt +25 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
- data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
- data/lib/seqtrimnext.rb +1 -1
- metadata +20 -7
- data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -1,245 +0,0 @@
|
|
1
|
-
require "plugin"
|
2
|
-
|
3
|
-
|
4
|
-
########################################################
|
5
|
-
# Author: Almudena Bocinos Rioboo
|
6
|
-
#
|
7
|
-
# Defines the main methods that are necessary to execute PluginRemAditArtifacts
|
8
|
-
|
9
|
-
#
|
10
|
-
# Inherit: Plugin
|
11
|
-
########################################################
|
12
|
-
|
13
|
-
class PluginRemAditArtifacts < Plugin
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# Begins the plugin_low_high_size's execution whit the sequence "seq"
|
18
|
-
# Returns a list with start of polyA or polyT seq or 0 if not found
|
19
|
-
# start of a possible second polyAT what was found in the second search, since it looks for both
|
20
|
-
# Uses the param polyA_length to look for at least that number of contiguous A's
|
21
|
-
def execute(seqs)
|
22
|
-
seqs.each do |s|
|
23
|
-
exec_seq(s)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
def exec_seq(seq)
|
29
|
-
|
30
|
-
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
|
31
|
-
seq2 = seq.seq_fasta
|
32
|
-
first = 0
|
33
|
-
last = seq2.size-1
|
34
|
-
old_first=first
|
35
|
-
old_last=last
|
36
|
-
|
37
|
-
|
38
|
-
while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
|
39
|
-
first += 6
|
40
|
-
seq2.slice!(0..5)
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
|
45
|
-
last -= 6
|
46
|
-
seq2.slice!(seq2.size-1-5..seq2.size-1)
|
47
|
-
|
48
|
-
end
|
49
|
-
|
50
|
-
|
51
|
-
#is_forward, is_cDNA,
|
52
|
-
#TrimExtremeNXs(first,last)
|
53
|
-
is_forward = @params.get_param('is_forward')=='true'
|
54
|
-
is_cDNA = @params.get_param('is_cDNA')=='true'
|
55
|
-
|
56
|
-
previous_first,previous_last =0,0
|
57
|
-
|
58
|
-
until ((previous_first == first) && (previous_last == last))
|
59
|
-
previous_first,previous_last = first, last
|
60
|
-
|
61
|
-
if (is_cDNA)
|
62
|
-
if (is_forward)
|
63
|
-
|
64
|
-
nTs = 0
|
65
|
-
nTs = $1.length if (seq2 =~ /^(T+)/i)
|
66
|
-
|
67
|
-
if (nTs > 3)
|
68
|
-
seq2.slice!(0..nTs -1)
|
69
|
-
first += nTs #-1
|
70
|
-
|
71
|
-
end
|
72
|
-
|
73
|
-
nAs = 0
|
74
|
-
nAs = $1.length if (seq2 =~ /(A+)$/i)
|
75
|
-
|
76
|
-
if (nAs > 3)
|
77
|
-
seq2.slice!(seq2.size - nAs..seq2.size - 1)
|
78
|
-
last -= nAs
|
79
|
-
|
80
|
-
end
|
81
|
-
else #si es backward
|
82
|
-
|
83
|
-
nTs = 0
|
84
|
-
nTs = $1.length if (seq2 =~ /(T+)$/i)
|
85
|
-
|
86
|
-
if (nTs > 3)
|
87
|
-
seq2.slice!(seq2.size-nTs..seq2.size-1)
|
88
|
-
last -= nTs
|
89
|
-
|
90
|
-
end
|
91
|
-
|
92
|
-
nAs = 0
|
93
|
-
nAs = $1.length if (seq2 =~ /^(A+)/i)
|
94
|
-
|
95
|
-
if (nAs > 3)
|
96
|
-
seq2.slice!(0..nAs -1)
|
97
|
-
first += nAs
|
98
|
-
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
|
105
|
-
if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
|
106
|
-
type='ActionRemAditArtifacts'
|
107
|
-
actions = []
|
108
|
-
# seq.add_action(first,last,type)
|
109
|
-
a=seq.new_action(first,last,type)
|
110
|
-
actions.push a
|
111
|
-
seq.add_actions(actions)
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
end
|
116
|
-
######################################################################
|
117
|
-
#---------------------------------------------------------------------
|
118
|
-
def execute_old(seq)
|
119
|
-
seq2 = seq.seq_fasta
|
120
|
-
#seq2 = 'dGCGGGG'
|
121
|
-
first = 0
|
122
|
-
last = seq2.size-1
|
123
|
-
old_first=first
|
124
|
-
old_last=last
|
125
|
-
|
126
|
-
# puts '1 '+seq2
|
127
|
-
# puts 'POS '+first.to_s
|
128
|
-
# puts 'POS '+last.to_s
|
129
|
-
while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
|
130
|
-
first += 6
|
131
|
-
seq2.slice!(0..5)
|
132
|
-
# puts '2 '+seq2
|
133
|
-
# already = true
|
134
|
-
end
|
135
|
-
|
136
|
-
|
137
|
-
while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
|
138
|
-
last -= 6
|
139
|
-
seq2.slice!(seq2.size-1-5..seq2.size-1)
|
140
|
-
# puts '3 '+seq2
|
141
|
-
# already = true
|
142
|
-
end
|
143
|
-
|
144
|
-
|
145
|
-
#is_forward, is_cDNA,
|
146
|
-
#TrimExtremeNXs(first,last)
|
147
|
-
is_forward = @params.get_param('is_forward')
|
148
|
-
is_cDNA = @params.get_param('is_cDNA')
|
149
|
-
# puts '4 '+seq2
|
150
|
-
previous_first,previous_last =0,0
|
151
|
-
|
152
|
-
until ((previous_first == first) && (previous_last == last))
|
153
|
-
previous_first,previous_last = first, last
|
154
|
-
# puts 'POS5-F '+first.to_s
|
155
|
-
# puts 'POS5-L '+last.to_s
|
156
|
-
|
157
|
-
if (is_cDNA)
|
158
|
-
if (is_forward)
|
159
|
-
# puts '5 '+seq2
|
160
|
-
nTs = 0
|
161
|
-
nTs = $1.length if (seq2 =~ /^(T+)/i)
|
162
|
-
if (nTs > 3)
|
163
|
-
seq2.slice!(0..nTs -1)
|
164
|
-
# puts '6 '+seq2
|
165
|
-
first += nTs #-1
|
166
|
-
# puts 'POS6-F '+first.to_s
|
167
|
-
end
|
168
|
-
nAs = 0
|
169
|
-
nAs = $1.length if (seq2 =~ /(A+)$/i)
|
170
|
-
# puts '6-7 '+seq2 + nAs.to_s
|
171
|
-
if (nAs > 3)
|
172
|
-
# puts '7 '+seq2
|
173
|
-
seq2.slice!(seq2.size - nAs..seq2.size - 1)
|
174
|
-
last -= nAs#seq2.size-nAs-2
|
175
|
-
# puts 'POS7-L '+last.to_s
|
176
|
-
end
|
177
|
-
else #si es backward
|
178
|
-
# puts '5b '+seq2
|
179
|
-
nTs = 0
|
180
|
-
nTs = $1.length if (seq2 =~ /(T+)$/i)
|
181
|
-
if (nTs > 3)
|
182
|
-
# puts '6b '+seq2
|
183
|
-
seq2.slice!(seq2.size-nTs..seq2.size-1)
|
184
|
-
last -= nTs#seq2.size-nTs -2
|
185
|
-
# puts 'POS6b-L '+last.to_s
|
186
|
-
end
|
187
|
-
|
188
|
-
nAs = 0
|
189
|
-
nAs = $1.length if (seq2 =~ /^(A+)/i)
|
190
|
-
if (nAs > 3)
|
191
|
-
# puts '7b '+seq2
|
192
|
-
seq2.slice!(0..nAs -1)
|
193
|
-
first += nAs#nAs -1
|
194
|
-
# puts 'POS7b-f '+first.to_s
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
#first -= 1 if (old_first!= first)
|
201
|
-
#last += 1 if (old_last!= last)
|
202
|
-
|
203
|
-
# puts 'POS7-8 '+first.to_s
|
204
|
-
# puts 'POS7-8 '+last.to_s
|
205
|
-
|
206
|
-
if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
|
207
|
-
type='ActionRemAditArtifacts'
|
208
|
-
|
209
|
-
# puts '8 '+seq2
|
210
|
-
seq.add_action(first,last,type)
|
211
|
-
end
|
212
|
-
# puts '9 '+seq2
|
213
|
-
|
214
|
-
end
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
######################################################################
|
219
|
-
#---------------------------------------------------------------------
|
220
|
-
|
221
|
-
#Returns an array with the errors due to parameters are missing
|
222
|
-
def self.check_params(params)
|
223
|
-
errors=[]
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
# if !params.exists?('ta')
|
228
|
-
# errors.push " The param <ta> doesn't exist"
|
229
|
-
# end
|
230
|
-
|
231
|
-
# if !params.exists?('poly_at_length')
|
232
|
-
# errors.push " The param <poly_at_length> doesn't exist"
|
233
|
-
# end
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
return errors
|
238
|
-
end
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
end
|