seqtrimnext 2.0.59 → 2.0.60
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +8 -39
- data/bin/seqtrimnext +7 -7
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +1 -1
- data/lib/seqtrimnext/classes/extract_stats.rb +1 -1
- data/lib/seqtrimnext/classes/seqtrim.rb +3 -3
- data/lib/seqtrimnext/plugins/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +2 -2
- data/lib/seqtrimnext/plugins_old/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins_old/plugin_adapters_old.rb +156 -0
- data/lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb +382 -0
- data/lib/seqtrimnext/plugins_old/plugin_rem_adit_artifacts.rb +234 -0
- data/lib/seqtrimnext/version.rb +4 -0
- data/lib/seqtrimnext.rb +2 -16
- data/seqtrimnext.gemspec +38 -0
- metadata +95 -156
- data/.gemtest +0 -0
- data/History.txt +0 -130
- data/Manifest.txt +0 -125
- data/PostInstall.txt +0 -7
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_helper.rb +0 -3
- data/test/test_seqtrimnext.rb +0 -11
@@ -0,0 +1,382 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginLowQuality. See the main method called execute.
|
7
|
+
|
8
|
+
#
|
9
|
+
# Inherit: Plugin
|
10
|
+
########################################################
|
11
|
+
|
12
|
+
class PluginLowQuality < Plugin
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def create_sum_window(qual,ini,index_window_end)
|
17
|
+
|
18
|
+
# puts "--------index w #{index_window_end}"
|
19
|
+
sum=[]
|
20
|
+
i=ini
|
21
|
+
# puts "#{i} #{index_window_end}"
|
22
|
+
while (i<=index_window_end) # initialize sum
|
23
|
+
sum[i]=0
|
24
|
+
i += 1
|
25
|
+
end
|
26
|
+
# puts " contenido de sum" + sum.join.to_s + " i index_window_end window #{i} #{index_window_end} #{@window}"
|
27
|
+
|
28
|
+
i=ini
|
29
|
+
while (i<ini+@window)
|
30
|
+
|
31
|
+
sum[ini] += qual[i]
|
32
|
+
i+=1
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
i=ini+1
|
37
|
+
|
38
|
+
while (i<=index_window_end)
|
39
|
+
|
40
|
+
sum[i]=sum[i-1]-qual[i-1]+qual[i+@window-1]
|
41
|
+
i+=1
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
# puts '2____' + sum.join(',') + 'pos sum' + ini.to_s
|
46
|
+
|
47
|
+
return sum
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_bounds_high_quality(sum,ini,index_window_end)
|
52
|
+
|
53
|
+
new_start = -1
|
54
|
+
new_end = -1
|
55
|
+
|
56
|
+
# puts " ini #{ini} iwe #{index_window_end}"
|
57
|
+
# puts "ini #{ini} index_window_end #{index_window_end} sum[ini] #{sum[ini]} cut_off #{@cut_off} suma #{sum.size} "
|
58
|
+
if (ini>index_window_end)
|
59
|
+
temp_start= ini
|
60
|
+
# new_start, new_end = temp_start, index_window_end
|
61
|
+
new_end = index_window_end # para que no crea que no hay alta calidad, sino que hemos sobrepasado el indice final de la ventana
|
62
|
+
# new_start, new_end = index_window_end, index_window_end
|
63
|
+
end
|
64
|
+
# puts " temp_start #{temp_start}" if (ini>index_window_end)
|
65
|
+
temp_start=((ini<=index_window_end) && (sum[ini]>=@cut_off))? ini : -1
|
66
|
+
|
67
|
+
i=ini+1
|
68
|
+
while (i<=index_window_end)
|
69
|
+
if (sum[i]>=@cut_off)
|
70
|
+
if (temp_start<0)
|
71
|
+
temp_start=i #just in!
|
72
|
+
# puts "just in ---- #{sum[i]}>= cut off #{@cut_off} pos #{temp_start}"
|
73
|
+
end
|
74
|
+
|
75
|
+
else
|
76
|
+
# puts "sum #{sum[i]} < cut off "
|
77
|
+
if(temp_start>=0) #just out!
|
78
|
+
# puts "update #{sum[i]}< cut off #{@cut_off} pos #{i}.if #{i-1} - #{temp_start} > #{new_end} - #{new_start}"
|
79
|
+
if (((i-1-temp_start)>=(new_end-new_start)))
|
80
|
+
new_start,new_end=temp_start,i-1
|
81
|
+
# puts "just out ---- new start,new_end = #{temp_start}, #{i-1} index_window_end = #{index_window_end}"
|
82
|
+
end
|
83
|
+
temp_start= -1
|
84
|
+
end
|
85
|
+
end
|
86
|
+
i+=1
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
# puts "4 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}"
|
91
|
+
|
92
|
+
if (temp_start != -1) # finished while ok
|
93
|
+
# puts "4 #{index_window_end} - #{temp_start} > #{new_end} - #{new_start}"
|
94
|
+
if ((index_window_end- temp_start) >= (new_end-new_start)) #put the end of the window at the end of sequence
|
95
|
+
new_start, new_end = temp_start, index_window_end #-1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# puts "5 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}"
|
100
|
+
|
101
|
+
# puts " newstart #{new_start} newend #{new_end}"
|
102
|
+
|
103
|
+
return new_start,new_end
|
104
|
+
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
def cut_fine_bounds_short(qual,new_start,new_end)
|
109
|
+
|
110
|
+
i=0
|
111
|
+
# puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}"
|
112
|
+
while (i<@window)
|
113
|
+
if (qual[new_start+i]>=@low)
|
114
|
+
break
|
115
|
+
end
|
116
|
+
i+=1
|
117
|
+
end
|
118
|
+
new_start +=i
|
119
|
+
# puts "#{new_start} ***********"
|
120
|
+
|
121
|
+
i=@window -1
|
122
|
+
while (i>=0)
|
123
|
+
if (qual[new_end+i]>=@low)
|
124
|
+
break
|
125
|
+
end
|
126
|
+
i-=1
|
127
|
+
end
|
128
|
+
new_end += i
|
129
|
+
# puts "6a new_start #{new_start} new-end #{new_end}"
|
130
|
+
|
131
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2 short"
|
132
|
+
return new_start, new_end
|
133
|
+
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
# cuts fine the high quality bounds
|
138
|
+
def cut_fine_bounds(qual,new_start,new_end)
|
139
|
+
# puts " ççççççççççççççç #{new_start+@window} >= #{new_end} "
|
140
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1"
|
141
|
+
# cut it fine
|
142
|
+
|
143
|
+
one_ok = 0
|
144
|
+
|
145
|
+
i=@window-1
|
146
|
+
# puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}"
|
147
|
+
while (i>=0)
|
148
|
+
if (qual[new_start+i] < @low)
|
149
|
+
break if one_ok
|
150
|
+
else
|
151
|
+
one_ok = 1
|
152
|
+
end
|
153
|
+
i-=1
|
154
|
+
end
|
155
|
+
new_start += i+1
|
156
|
+
oneOk = 0
|
157
|
+
i=0
|
158
|
+
while (i<@window)
|
159
|
+
if (qual[new_end+i] < @low)
|
160
|
+
break if oneOk
|
161
|
+
else
|
162
|
+
oneOk = 1
|
163
|
+
end
|
164
|
+
i+=1
|
165
|
+
end
|
166
|
+
new_end += i-1
|
167
|
+
# puts "6b new_start #{new_start} new-end #{new_end}"
|
168
|
+
|
169
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2"
|
170
|
+
return new_start, new_end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
def find_high_quality(qual,ini=0)
|
175
|
+
|
176
|
+
# puts qual.class.to_s + qual.size.to_s + 'size,' + @window.to_s + ' window, '+ qual.join(',') + 'size' + qual.size.to_s
|
177
|
+
|
178
|
+
update=false
|
179
|
+
# if @window>qual.length-ini #search in the last window although has a low size
|
180
|
+
# @window=qual.length-ini
|
181
|
+
# # puts ' UPDATE WINDOW Y CUT OFF ' + @window.to_s
|
182
|
+
# @cut_off=@window*@low
|
183
|
+
# update=true
|
184
|
+
# end
|
185
|
+
|
186
|
+
if (ini==0 or update)
|
187
|
+
#index_window_start = ini
|
188
|
+
@index_window_end = qual.size- @window #don't sub 1, or will lost the last nucleotide of the sequence -1;
|
189
|
+
#TODO En seqtrim de Juan iwe, que en nuestro seqtrim se llama index_window_end, está perdiendo 2 nucleótidos de la última ventana calculada
|
190
|
+
|
191
|
+
|
192
|
+
@sum = create_sum_window(qual,ini,@index_window_end)
|
193
|
+
# puts "SUMA #{@sum.join(' ')}"
|
194
|
+
end
|
195
|
+
|
196
|
+
new_start, new_end = find_bounds_high_quality(@sum,ini,@index_window_end)
|
197
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1"
|
198
|
+
|
199
|
+
if (new_start>=0)
|
200
|
+
if (new_start+@window >= new_end)
|
201
|
+
# puts "cfs"
|
202
|
+
new_start, new_end = cut_fine_bounds_short(qual,new_start,new_end)
|
203
|
+
# puts "cfs"
|
204
|
+
|
205
|
+
else
|
206
|
+
# puts "cf"
|
207
|
+
new_start, new_end = cut_fine_bounds(qual,new_start,new_end)
|
208
|
+
# puts "cf"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2"
|
213
|
+
|
214
|
+
return new_start,new_end #+1
|
215
|
+
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
def add_action_before_high_qual(p_begin,p_end,actions,seq,start)
|
221
|
+
|
222
|
+
action_size = p_begin-1
|
223
|
+
if action_size>=(@window/2)
|
224
|
+
|
225
|
+
|
226
|
+
# puts "action_SIZE1 #{action_size} > #{@window/2}"
|
227
|
+
|
228
|
+
if ( (p_begin>0) && (action_size>0) ) #if there is action before the high qual part
|
229
|
+
# it's created an action before of the high quality part
|
230
|
+
a = seq.new_action(start ,p_begin-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
|
231
|
+
# puts " new low qual start: #{start} = #{a.start_pos} end: #{p_begin} -1 = #{a.end_pos}"
|
232
|
+
actions.push a
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def add_action_after_high_qual(p_begin,p_end,actions,seq)
|
238
|
+
|
239
|
+
action_size = seq.insert_end-p_end
|
240
|
+
if action_size>=(@window/2)
|
241
|
+
|
242
|
+
|
243
|
+
# puts "action_SIZE2 #{action_size} > #{@window/2}"
|
244
|
+
|
245
|
+
if ((p_end<seq.seq_fasta.size-1) && (action_size>0) ) #if there is action before the high qual part
|
246
|
+
# it's created an action before of the high quality part
|
247
|
+
a = seq.new_action(p_end-seq.insert_start+1,seq.seq_fasta.size-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
|
248
|
+
|
249
|
+
actions.push a
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
|
256
|
+
|
257
|
+
|
258
|
+
######################################################################
|
259
|
+
#---------------------------------------------------------------------
|
260
|
+
|
261
|
+
# Begins the plugin1's execution whit the sequence "seq"
|
262
|
+
# Creates an action by each subsequence with low quality to eliminate it
|
263
|
+
# A subsequence has low quality if (the add of all its qualitis < subsequence_size*20)
|
264
|
+
# Creates the qualities windows from the sequence, looks for the subsequence with high quality
|
265
|
+
# and mark, with an action, the before part to the High Quality Subsequence like a low quality part
|
266
|
+
# Finally mark, with an action, the after part to the High Quality Subsequence like a low quality part
|
267
|
+
#-----------------------------------------------------------------
|
268
|
+
|
269
|
+
def exec_seq(seq,blast_query)
|
270
|
+
|
271
|
+
if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
|
272
|
+
$LOG.error " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
|
273
|
+
elsif (seq.seq_qual.size>0)
|
274
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
|
275
|
+
|
276
|
+
@low=@params.get_param('min_quality').to_i
|
277
|
+
|
278
|
+
if @params.get_param('window_width').to_i>seq.seq_fasta.length
|
279
|
+
@window=seq.seq_fasta.length
|
280
|
+
|
281
|
+
else
|
282
|
+
@window=@params.get_param('window_width').to_i
|
283
|
+
end
|
284
|
+
@cut_off=@window*@low
|
285
|
+
|
286
|
+
type='ActionLowQuality'
|
287
|
+
low_qual=0
|
288
|
+
actions=[]
|
289
|
+
|
290
|
+
p_begin,p_end =0,-1 # positions from high quality bounds
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
while ((p_begin>=0) && (p_end + 1 < seq.seq_qual.size) )
|
295
|
+
|
296
|
+
|
297
|
+
p_begin_old,p_end_old= p_begin, p_end
|
298
|
+
p_begin,p_end = find_high_quality(seq.seq_qual,p_end+1)
|
299
|
+
|
300
|
+
if ((p_begin>0) && (p_begin-p_end_old-1>=@window/2)) #if we have found the high quality part, and the low quality part has enough size
|
301
|
+
# it's created an action before of the high quality part
|
302
|
+
add_action_before_high_qual(p_begin,p_end,actions,seq,p_end_old+1)
|
303
|
+
|
304
|
+
# puts "low1 ini fin #{p_end_old+1} #{p_begin-1} = #{p_begin-1-p_end_old-1+1}"
|
305
|
+
low_qual = p_begin-1-p_end_old-1 + 1
|
306
|
+
|
307
|
+
add_stats('low_qual',low_qual)
|
308
|
+
# @stats[:low_qual]={low_qual => 1}
|
309
|
+
|
310
|
+
end
|
311
|
+
|
312
|
+
# puts "-----ññññ----- high quality #{p_begin} #{p_end}+#{seq.insert_start} seq size #{seq.seq_fasta.size}"
|
313
|
+
|
314
|
+
end
|
315
|
+
|
316
|
+
# puts "high [#{p_begin}, #{p_end}] old [#{p_begin_old}, #{p_end_old}] size #{seq.seq_qual.size}"
|
317
|
+
if ((p_begin>=0) && (p_end+1<seq.seq_qual.size)) #if we have found the high quality part
|
318
|
+
|
319
|
+
# it's created an action after of the high quality part
|
320
|
+
add_action_after_high_qual(p_begin,p_end,actions,seq)
|
321
|
+
# puts "low2 ini fin #{p_end+1} #{seq.seq_fasta.size-1} = #{seq.seq_fasta.size-1-p_end-1+1}"
|
322
|
+
low_qual = seq.seq_fasta.size-1 - p_end-seq.insert_start-1 + 1
|
323
|
+
# if @stats[:low_qual][low_qual].nil?
|
324
|
+
# @stats[:low_qual][low_qual] = 0
|
325
|
+
# end
|
326
|
+
# @stats[:low_qual][low_qual] += 1
|
327
|
+
add_stats('low_qual',low_qual)
|
328
|
+
# @stats[:low_qual]={low_qual => 1}
|
329
|
+
end
|
330
|
+
|
331
|
+
# puts "-----ññññ----- high quality #{p_begin} #{p_end}"
|
332
|
+
|
333
|
+
|
334
|
+
if p_end<0 and p_end_old #add action low qual to all the part
|
335
|
+
a = seq.new_action(p_end_old+1 ,seq.seq_fasta.size-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
|
336
|
+
# puts "new low qual start: #{p_end_old+1} end: #{seq.seq_fasta.size-1} = #{seq.seq_fasta.size-1 - p_end_old-1 + 1}"
|
337
|
+
low_qual = seq.seq_fasta.size-1 - p_end_old-1 + 1
|
338
|
+
|
339
|
+
# if @stats[:low_qual][low_qual].nil?
|
340
|
+
# @stats[:low_qual][low_qual] = 0
|
341
|
+
# end
|
342
|
+
# @stats[:low_qual][low_qual] += 1
|
343
|
+
add_stats('low_qual',low_qual)
|
344
|
+
# @stats[:low_qual]={'low_qual' => 1}
|
345
|
+
|
346
|
+
actions.push a
|
347
|
+
end
|
348
|
+
|
349
|
+
# puts "------- ADDING ACTIONs LOW QUAL #{actions.size}"
|
350
|
+
seq.add_actions(actions)
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|
354
|
+
|
355
|
+
#-----------------------------------------------------------------
|
356
|
+
|
357
|
+
|
358
|
+
######################################################################
|
359
|
+
#---------------------------------------------------------------------
|
360
|
+
|
361
|
+
#Returns an array with the errors due to parameters are missing
|
362
|
+
def self.check_params(params)
|
363
|
+
|
364
|
+
errors=[]
|
365
|
+
|
366
|
+
comment='Minimum quality value for every nucleotide'
|
367
|
+
default_value = 20
|
368
|
+
params.check_param(errors,'min_quality','Integer',default_value,comment)
|
369
|
+
|
370
|
+
comment='Quality window for scanning low quality segments'
|
371
|
+
default_value = 15
|
372
|
+
params.check_param(errors,'window_width','Integer',default_value,comment)
|
373
|
+
|
374
|
+
|
375
|
+
|
376
|
+
return errors
|
377
|
+
end
|
378
|
+
|
379
|
+
|
380
|
+
private :find_high_quality
|
381
|
+
|
382
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
|
4
|
+
########################################################
|
5
|
+
# Author: Almudena Bocinos Rioboo
|
6
|
+
#
|
7
|
+
# Defines the main methods that are necessary to execute PluginRemAditArtifacts
|
8
|
+
|
9
|
+
#
|
10
|
+
# Inherit: Plugin
|
11
|
+
########################################################
|
12
|
+
|
13
|
+
class PluginRemAditArtifacts < Plugin
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def exec_seq(seq,blast_query)
|
18
|
+
|
19
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
|
20
|
+
seq2 = seq.seq_fasta
|
21
|
+
first = 0
|
22
|
+
last = seq2.size-1
|
23
|
+
old_first=first
|
24
|
+
old_last=last
|
25
|
+
|
26
|
+
|
27
|
+
while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
|
28
|
+
first += 6
|
29
|
+
seq2.slice!(0..5)
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
|
34
|
+
last -= 6
|
35
|
+
seq2.slice!(seq2.size-1-5..seq2.size-1)
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
#is_forward, is_cDNA,
|
41
|
+
#TrimExtremeNXs(first,last)
|
42
|
+
is_forward = @params.get_param('is_forward')=='true'
|
43
|
+
is_cDNA = @params.get_param('is_cDNA')=='true'
|
44
|
+
|
45
|
+
previous_first,previous_last =0,0
|
46
|
+
|
47
|
+
until ((previous_first == first) && (previous_last == last))
|
48
|
+
previous_first,previous_last = first, last
|
49
|
+
|
50
|
+
if (is_cDNA)
|
51
|
+
if (is_forward)
|
52
|
+
|
53
|
+
nTs = 0
|
54
|
+
nTs = $1.length if (seq2 =~ /^(T+)/i)
|
55
|
+
|
56
|
+
if (nTs > 3)
|
57
|
+
seq2.slice!(0..nTs -1)
|
58
|
+
first += nTs #-1
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
nAs = 0
|
63
|
+
nAs = $1.length if (seq2 =~ /(A+)$/i)
|
64
|
+
|
65
|
+
if (nAs > 3)
|
66
|
+
seq2.slice!(seq2.size - nAs..seq2.size - 1)
|
67
|
+
last -= nAs
|
68
|
+
|
69
|
+
end
|
70
|
+
else #si es backward
|
71
|
+
|
72
|
+
nTs = 0
|
73
|
+
nTs = $1.length if (seq2 =~ /(T+)$/i)
|
74
|
+
|
75
|
+
if (nTs > 3)
|
76
|
+
seq2.slice!(seq2.size-nTs..seq2.size-1)
|
77
|
+
last -= nTs
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
nAs = 0
|
82
|
+
nAs = $1.length if (seq2 =~ /^(A+)/i)
|
83
|
+
|
84
|
+
if (nAs > 3)
|
85
|
+
seq2.slice!(0..nAs -1)
|
86
|
+
first += nAs
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
|
95
|
+
type='ActionRemAditArtifacts'
|
96
|
+
actions = []
|
97
|
+
# seq.add_action(first,last,type)
|
98
|
+
a=seq.new_action(first,last,type)
|
99
|
+
actions.push a
|
100
|
+
seq.add_actions(actions)
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
end
|
105
|
+
######################################################################
|
106
|
+
#---------------------------------------------------------------------
|
107
|
+
def execute_old(seq)
|
108
|
+
seq2 = seq.seq_fasta
|
109
|
+
#seq2 = 'dGCGGGG'
|
110
|
+
first = 0
|
111
|
+
last = seq2.size-1
|
112
|
+
old_first=first
|
113
|
+
old_last=last
|
114
|
+
|
115
|
+
# puts '1 '+seq2
|
116
|
+
# puts 'POS '+first.to_s
|
117
|
+
# puts 'POS '+last.to_s
|
118
|
+
while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
|
119
|
+
first += 6
|
120
|
+
seq2.slice!(0..5)
|
121
|
+
# puts '2 '+seq2
|
122
|
+
# already = true
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
|
127
|
+
last -= 6
|
128
|
+
seq2.slice!(seq2.size-1-5..seq2.size-1)
|
129
|
+
# puts '3 '+seq2
|
130
|
+
# already = true
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
#is_forward, is_cDNA,
|
135
|
+
#TrimExtremeNXs(first,last)
|
136
|
+
is_forward = @params.get_param('is_forward')
|
137
|
+
is_cDNA = @params.get_param('is_cDNA')
|
138
|
+
# puts '4 '+seq2
|
139
|
+
previous_first,previous_last =0,0
|
140
|
+
|
141
|
+
until ((previous_first == first) && (previous_last == last))
|
142
|
+
previous_first,previous_last = first, last
|
143
|
+
# puts 'POS5-F '+first.to_s
|
144
|
+
# puts 'POS5-L '+last.to_s
|
145
|
+
|
146
|
+
if (is_cDNA)
|
147
|
+
if (is_forward)
|
148
|
+
# puts '5 '+seq2
|
149
|
+
nTs = 0
|
150
|
+
nTs = $1.length if (seq2 =~ /^(T+)/i)
|
151
|
+
if (nTs > 3)
|
152
|
+
seq2.slice!(0..nTs -1)
|
153
|
+
# puts '6 '+seq2
|
154
|
+
first += nTs #-1
|
155
|
+
# puts 'POS6-F '+first.to_s
|
156
|
+
end
|
157
|
+
nAs = 0
|
158
|
+
nAs = $1.length if (seq2 =~ /(A+)$/i)
|
159
|
+
# puts '6-7 '+seq2 + nAs.to_s
|
160
|
+
if (nAs > 3)
|
161
|
+
# puts '7 '+seq2
|
162
|
+
seq2.slice!(seq2.size - nAs..seq2.size - 1)
|
163
|
+
last -= nAs#seq2.size-nAs-2
|
164
|
+
# puts 'POS7-L '+last.to_s
|
165
|
+
end
|
166
|
+
else #si es backward
|
167
|
+
# puts '5b '+seq2
|
168
|
+
nTs = 0
|
169
|
+
nTs = $1.length if (seq2 =~ /(T+)$/i)
|
170
|
+
if (nTs > 3)
|
171
|
+
# puts '6b '+seq2
|
172
|
+
seq2.slice!(seq2.size-nTs..seq2.size-1)
|
173
|
+
last -= nTs#seq2.size-nTs -2
|
174
|
+
# puts 'POS6b-L '+last.to_s
|
175
|
+
end
|
176
|
+
|
177
|
+
nAs = 0
|
178
|
+
nAs = $1.length if (seq2 =~ /^(A+)/i)
|
179
|
+
if (nAs > 3)
|
180
|
+
# puts '7b '+seq2
|
181
|
+
seq2.slice!(0..nAs -1)
|
182
|
+
first += nAs#nAs -1
|
183
|
+
# puts 'POS7b-f '+first.to_s
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
#first -= 1 if (old_first!= first)
|
190
|
+
#last += 1 if (old_last!= last)
|
191
|
+
|
192
|
+
# puts 'POS7-8 '+first.to_s
|
193
|
+
# puts 'POS7-8 '+last.to_s
|
194
|
+
|
195
|
+
if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
|
196
|
+
type='ActionRemAditArtifacts'
|
197
|
+
|
198
|
+
# puts '8 '+seq2
|
199
|
+
seq.add_action(first,last,type)
|
200
|
+
end
|
201
|
+
# puts '9 '+seq2
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
|
206
|
+
|
207
|
+
######################################################################
|
208
|
+
#---------------------------------------------------------------------
|
209
|
+
|
210
|
+
#Returns an array with the errors due to parameters are missing
|
211
|
+
def self.check_params(params)
|
212
|
+
errors=[]
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
# if !params.exists?('ta')
|
217
|
+
# errors.push " The param <ta> doesn't exist"
|
218
|
+
# end
|
219
|
+
|
220
|
+
# if !params.exists?('poly_at_length')
|
221
|
+
# errors.push " The param <poly_at_length> doesn't exist"
|
222
|
+
# end
|
223
|
+
|
224
|
+
|
225
|
+
|
226
|
+
return errors
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
end
|
data/lib/seqtrimnext.rb
CHANGED
@@ -1,8 +1,4 @@
|
|
1
|
-
|
2
|
-
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
-
|
4
|
-
|
5
|
-
# setup REQUIRE_PATH
|
1
|
+
require "seqtrimnext/version"
|
6
2
|
|
7
3
|
ROOT_PATH=File.join(File.dirname(__FILE__),'seqtrimnext')
|
8
4
|
|
@@ -22,16 +18,6 @@ $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
|
|
22
18
|
|
23
19
|
$: << File.expand_path(File.join(ROOT_PATH, 'latex','classes'))
|
24
20
|
|
25
|
-
# puts $:
|
26
|
-
|
27
21
|
module Seqtrimnext
|
28
|
-
|
29
|
-
# SEQTRIM_VERSION_REVISION=27
|
30
|
-
# SEQTRIM_VERSION_STAGE = 'b'
|
31
|
-
# SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
|
32
|
-
|
33
|
-
VERSION = '2.0.59'
|
34
|
-
|
35
|
-
SEQTRIM_VERSION = VERSION
|
36
|
-
|
22
|
+
# Your code goes here...
|
37
23
|
end
|
data/seqtrimnext.gemspec
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'seqtrimnext/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "seqtrimnext"
|
8
|
+
spec.version = Seqtrimnext::VERSION
|
9
|
+
spec.authors = ["Dario Guerrero", "Almudena Bocinos"]
|
10
|
+
spec.email = ["dariogf@gmail.com", "alkoke@gmail.com
|
11
|
+
t"]
|
12
|
+
spec.summary = %q{Sequences preprocessing and cleaning software}
|
13
|
+
spec.description = %q{Seqtrimnext is a plugin based system to preprocess and clean sequences from multiple NGS sequencing platforms}
|
14
|
+
spec.homepage = ""
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0")
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
+
|
25
|
+
spec.add_runtime_dependency 'narray','>=0'
|
26
|
+
spec.add_runtime_dependency 'gnuplot','>=0'
|
27
|
+
spec.add_runtime_dependency 'term-ansicolor','>=1.0.5'
|
28
|
+
spec.add_runtime_dependency 'xml-simple','>=1.0.12'
|
29
|
+
spec.add_runtime_dependency 'scbi_blast','>=0.0.34'
|
30
|
+
spec.add_runtime_dependency 'scbi_mapreduce','>=0.0.38'
|
31
|
+
spec.add_runtime_dependency 'scbi_fasta','>=0.1.7'
|
32
|
+
spec.add_runtime_dependency 'scbi_fastq','>=0.0.18'
|
33
|
+
spec.add_runtime_dependency 'scbi_plot','>=0.0.6'
|
34
|
+
spec.add_runtime_dependency 'scbi_math','>=0.0.1'
|
35
|
+
spec.add_runtime_dependency 'scbi_headers','>=0.0.2'
|
36
|
+
|
37
|
+
|
38
|
+
end
|