seqtrimnext 2.0.59 → 2.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +8 -39
- data/bin/seqtrimnext +7 -7
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +1 -1
- data/lib/seqtrimnext/classes/extract_stats.rb +1 -1
- data/lib/seqtrimnext/classes/seqtrim.rb +3 -3
- data/lib/seqtrimnext/plugins/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +1 -1
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +2 -2
- data/lib/seqtrimnext/plugins_old/.DS_Store +0 -0
- data/lib/seqtrimnext/plugins_old/plugin_adapters_old.rb +156 -0
- data/lib/seqtrimnext/plugins_old/plugin_low_quality_old.rb +382 -0
- data/lib/seqtrimnext/plugins_old/plugin_rem_adit_artifacts.rb +234 -0
- data/lib/seqtrimnext/version.rb +4 -0
- data/lib/seqtrimnext.rb +2 -16
- data/seqtrimnext.gemspec +38 -0
- metadata +95 -156
- data/.gemtest +0 -0
- data/History.txt +0 -130
- data/Manifest.txt +0 -125
- data/PostInstall.txt +0 -7
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_helper.rb +0 -3
- data/test/test_seqtrimnext.rb +0 -11
@@ -0,0 +1,382 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute PluginLowQuality. See the main method called execute.
|
7
|
+
|
8
|
+
#
|
9
|
+
# Inherit: Plugin
|
10
|
+
########################################################
|
11
|
+
|
12
|
+
class PluginLowQuality < Plugin
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def create_sum_window(qual,ini,index_window_end)
|
17
|
+
|
18
|
+
# puts "--------index w #{index_window_end}"
|
19
|
+
sum=[]
|
20
|
+
i=ini
|
21
|
+
# puts "#{i} #{index_window_end}"
|
22
|
+
while (i<=index_window_end) # initialize sum
|
23
|
+
sum[i]=0
|
24
|
+
i += 1
|
25
|
+
end
|
26
|
+
# puts " contenido de sum" + sum.join.to_s + " i index_window_end window #{i} #{index_window_end} #{@window}"
|
27
|
+
|
28
|
+
i=ini
|
29
|
+
while (i<ini+@window)
|
30
|
+
|
31
|
+
sum[ini] += qual[i]
|
32
|
+
i+=1
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
i=ini+1
|
37
|
+
|
38
|
+
while (i<=index_window_end)
|
39
|
+
|
40
|
+
sum[i]=sum[i-1]-qual[i-1]+qual[i+@window-1]
|
41
|
+
i+=1
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
# puts '2____' + sum.join(',') + 'pos sum' + ini.to_s
|
46
|
+
|
47
|
+
return sum
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_bounds_high_quality(sum,ini,index_window_end)
|
52
|
+
|
53
|
+
new_start = -1
|
54
|
+
new_end = -1
|
55
|
+
|
56
|
+
# puts " ini #{ini} iwe #{index_window_end}"
|
57
|
+
# puts "ini #{ini} index_window_end #{index_window_end} sum[ini] #{sum[ini]} cut_off #{@cut_off} suma #{sum.size} "
|
58
|
+
if (ini>index_window_end)
|
59
|
+
temp_start= ini
|
60
|
+
# new_start, new_end = temp_start, index_window_end
|
61
|
+
new_end = index_window_end # para que no crea que no hay alta calidad, sino que hemos sobrepasado el indice final de la ventana
|
62
|
+
# new_start, new_end = index_window_end, index_window_end
|
63
|
+
end
|
64
|
+
# puts " temp_start #{temp_start}" if (ini>index_window_end)
|
65
|
+
temp_start=((ini<=index_window_end) && (sum[ini]>=@cut_off))? ini : -1
|
66
|
+
|
67
|
+
i=ini+1
|
68
|
+
while (i<=index_window_end)
|
69
|
+
if (sum[i]>=@cut_off)
|
70
|
+
if (temp_start<0)
|
71
|
+
temp_start=i #just in!
|
72
|
+
# puts "just in ---- #{sum[i]}>= cut off #{@cut_off} pos #{temp_start}"
|
73
|
+
end
|
74
|
+
|
75
|
+
else
|
76
|
+
# puts "sum #{sum[i]} < cut off "
|
77
|
+
if(temp_start>=0) #just out!
|
78
|
+
# puts "update #{sum[i]}< cut off #{@cut_off} pos #{i}.if #{i-1} - #{temp_start} > #{new_end} - #{new_start}"
|
79
|
+
if (((i-1-temp_start)>=(new_end-new_start)))
|
80
|
+
new_start,new_end=temp_start,i-1
|
81
|
+
# puts "just out ---- new start,new_end = #{temp_start}, #{i-1} index_window_end = #{index_window_end}"
|
82
|
+
end
|
83
|
+
temp_start= -1
|
84
|
+
end
|
85
|
+
end
|
86
|
+
i+=1
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
# puts "4 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}"
|
91
|
+
|
92
|
+
if (temp_start != -1) # finished while ok
|
93
|
+
# puts "4 #{index_window_end} - #{temp_start} > #{new_end} - #{new_start}"
|
94
|
+
if ((index_window_end- temp_start) >= (new_end-new_start)) #put the end of the window at the end of sequence
|
95
|
+
new_start, new_end = temp_start, index_window_end #-1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# puts "5 temp_start #{temp_start} new_start #{new_start} new-end #{new_end}"
|
100
|
+
|
101
|
+
# puts " newstart #{new_start} newend #{new_end}"
|
102
|
+
|
103
|
+
return new_start,new_end
|
104
|
+
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
def cut_fine_bounds_short(qual,new_start,new_end)
|
109
|
+
|
110
|
+
i=0
|
111
|
+
# puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}"
|
112
|
+
while (i<@window)
|
113
|
+
if (qual[new_start+i]>=@low)
|
114
|
+
break
|
115
|
+
end
|
116
|
+
i+=1
|
117
|
+
end
|
118
|
+
new_start +=i
|
119
|
+
# puts "#{new_start} ***********"
|
120
|
+
|
121
|
+
i=@window -1
|
122
|
+
while (i>=0)
|
123
|
+
if (qual[new_end+i]>=@low)
|
124
|
+
break
|
125
|
+
end
|
126
|
+
i-=1
|
127
|
+
end
|
128
|
+
new_end += i
|
129
|
+
# puts "6a new_start #{new_start} new-end #{new_end}"
|
130
|
+
|
131
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2 short"
|
132
|
+
return new_start, new_end
|
133
|
+
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
# cuts fine the high quality bounds
|
138
|
+
def cut_fine_bounds(qual,new_start,new_end)
|
139
|
+
# puts " ççççççççççççççç #{new_start+@window} >= #{new_end} "
|
140
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1"
|
141
|
+
# cut it fine
|
142
|
+
|
143
|
+
one_ok = 0
|
144
|
+
|
145
|
+
i=@window-1
|
146
|
+
# puts " qual[new_start+i] new_start #{new_start} i #{i} = #{new_start+i} qual.size #{qual.size}"
|
147
|
+
while (i>=0)
|
148
|
+
if (qual[new_start+i] < @low)
|
149
|
+
break if one_ok
|
150
|
+
else
|
151
|
+
one_ok = 1
|
152
|
+
end
|
153
|
+
i-=1
|
154
|
+
end
|
155
|
+
new_start += i+1
|
156
|
+
oneOk = 0
|
157
|
+
i=0
|
158
|
+
while (i<@window)
|
159
|
+
if (qual[new_end+i] < @low)
|
160
|
+
break if oneOk
|
161
|
+
else
|
162
|
+
oneOk = 1
|
163
|
+
end
|
164
|
+
i+=1
|
165
|
+
end
|
166
|
+
new_end += i-1
|
167
|
+
# puts "6b new_start #{new_start} new-end #{new_end}"
|
168
|
+
|
169
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2"
|
170
|
+
return new_start, new_end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
def find_high_quality(qual,ini=0)
|
175
|
+
|
176
|
+
# puts qual.class.to_s + qual.size.to_s + 'size,' + @window.to_s + ' window, '+ qual.join(',') + 'size' + qual.size.to_s
|
177
|
+
|
178
|
+
update=false
|
179
|
+
# if @window>qual.length-ini #search in the last window although has a low size
|
180
|
+
# @window=qual.length-ini
|
181
|
+
# # puts ' UPDATE WINDOW Y CUT OFF ' + @window.to_s
|
182
|
+
# @cut_off=@window*@low
|
183
|
+
# update=true
|
184
|
+
# end
|
185
|
+
|
186
|
+
if (ini==0 or update)
|
187
|
+
#index_window_start = ini
|
188
|
+
@index_window_end = qual.size- @window #don't sub 1, or will lost the last nucleotide of the sequence -1;
|
189
|
+
#TODO En seqtrim de Juan iwe, que en nuestro seqtrim se llama index_window_end, está perdiendo 2 nucleótidos de la última ventana calculada
|
190
|
+
|
191
|
+
|
192
|
+
@sum = create_sum_window(qual,ini,@index_window_end)
|
193
|
+
# puts "SUMA #{@sum.join(' ')}"
|
194
|
+
end
|
195
|
+
|
196
|
+
new_start, new_end = find_bounds_high_quality(@sum,ini,@index_window_end)
|
197
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o1"
|
198
|
+
|
199
|
+
if (new_start>=0)
|
200
|
+
if (new_start+@window >= new_end)
|
201
|
+
# puts "cfs"
|
202
|
+
new_start, new_end = cut_fine_bounds_short(qual,new_start,new_end)
|
203
|
+
# puts "cfs"
|
204
|
+
|
205
|
+
else
|
206
|
+
# puts "cf"
|
207
|
+
new_start, new_end = cut_fine_bounds(qual,new_start,new_end)
|
208
|
+
# puts "cf"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# puts " #{new_start} #{new_end} .o.o.o.o.o.o.o.o2"
|
213
|
+
|
214
|
+
return new_start,new_end #+1
|
215
|
+
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
def add_action_before_high_qual(p_begin,p_end,actions,seq,start)
|
221
|
+
|
222
|
+
action_size = p_begin-1
|
223
|
+
if action_size>=(@window/2)
|
224
|
+
|
225
|
+
|
226
|
+
# puts "action_SIZE1 #{action_size} > #{@window/2}"
|
227
|
+
|
228
|
+
if ( (p_begin>0) && (action_size>0) ) #if there is action before the high qual part
|
229
|
+
# it's created an action before of the high quality part
|
230
|
+
a = seq.new_action(start ,p_begin-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
|
231
|
+
# puts " new low qual start: #{start} = #{a.start_pos} end: #{p_begin} -1 = #{a.end_pos}"
|
232
|
+
actions.push a
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def add_action_after_high_qual(p_begin,p_end,actions,seq)
|
238
|
+
|
239
|
+
action_size = seq.insert_end-p_end
|
240
|
+
if action_size>=(@window/2)
|
241
|
+
|
242
|
+
|
243
|
+
# puts "action_SIZE2 #{action_size} > #{@window/2}"
|
244
|
+
|
245
|
+
if ((p_end<seq.seq_fasta.size-1) && (action_size>0) ) #if there is action before the high qual part
|
246
|
+
# it's created an action before of the high quality part
|
247
|
+
a = seq.new_action(p_end-seq.insert_start+1,seq.seq_fasta.size-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
|
248
|
+
|
249
|
+
actions.push a
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
|
256
|
+
|
257
|
+
|
258
|
+
######################################################################
|
259
|
+
#---------------------------------------------------------------------
|
260
|
+
|
261
|
+
# Begins the plugin1's execution whit the sequence "seq"
|
262
|
+
# Creates an action by each subsequence with low quality to eliminate it
|
263
|
+
# A subsequence has low quality if (the add of all its qualitis < subsequence_size*20)
|
264
|
+
# Creates the qualities windows from the sequence, looks for the subsequence with high quality
|
265
|
+
# and mark, with an action, the before part to the High Quality Subsequence like a low quality part
|
266
|
+
# Finally mark, with an action, the after part to the High Quality Subsequence like a low quality part
|
267
|
+
#-----------------------------------------------------------------
|
268
|
+
|
269
|
+
def exec_seq(seq,blast_query)
|
270
|
+
|
271
|
+
if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
|
272
|
+
$LOG.error " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
|
273
|
+
elsif (seq.seq_qual.size>0)
|
274
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
|
275
|
+
|
276
|
+
@low=@params.get_param('min_quality').to_i
|
277
|
+
|
278
|
+
if @params.get_param('window_width').to_i>seq.seq_fasta.length
|
279
|
+
@window=seq.seq_fasta.length
|
280
|
+
|
281
|
+
else
|
282
|
+
@window=@params.get_param('window_width').to_i
|
283
|
+
end
|
284
|
+
@cut_off=@window*@low
|
285
|
+
|
286
|
+
type='ActionLowQuality'
|
287
|
+
low_qual=0
|
288
|
+
actions=[]
|
289
|
+
|
290
|
+
p_begin,p_end =0,-1 # positions from high quality bounds
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
while ((p_begin>=0) && (p_end + 1 < seq.seq_qual.size) )
|
295
|
+
|
296
|
+
|
297
|
+
p_begin_old,p_end_old= p_begin, p_end
|
298
|
+
p_begin,p_end = find_high_quality(seq.seq_qual,p_end+1)
|
299
|
+
|
300
|
+
if ((p_begin>0) && (p_begin-p_end_old-1>=@window/2)) #if we have found the high quality part, and the low quality part has enough size
|
301
|
+
# it's created an action before of the high quality part
|
302
|
+
add_action_before_high_qual(p_begin,p_end,actions,seq,p_end_old+1)
|
303
|
+
|
304
|
+
# puts "low1 ini fin #{p_end_old+1} #{p_begin-1} = #{p_begin-1-p_end_old-1+1}"
|
305
|
+
low_qual = p_begin-1-p_end_old-1 + 1
|
306
|
+
|
307
|
+
add_stats('low_qual',low_qual)
|
308
|
+
# @stats[:low_qual]={low_qual => 1}
|
309
|
+
|
310
|
+
end
|
311
|
+
|
312
|
+
# puts "-----ññññ----- high quality #{p_begin} #{p_end}+#{seq.insert_start} seq size #{seq.seq_fasta.size}"
|
313
|
+
|
314
|
+
end
|
315
|
+
|
316
|
+
# puts "high [#{p_begin}, #{p_end}] old [#{p_begin_old}, #{p_end_old}] size #{seq.seq_qual.size}"
|
317
|
+
if ((p_begin>=0) && (p_end+1<seq.seq_qual.size)) #if we have found the high quality part
|
318
|
+
|
319
|
+
# it's created an action after of the high quality part
|
320
|
+
add_action_after_high_qual(p_begin,p_end,actions,seq)
|
321
|
+
# puts "low2 ini fin #{p_end+1} #{seq.seq_fasta.size-1} = #{seq.seq_fasta.size-1-p_end-1+1}"
|
322
|
+
low_qual = seq.seq_fasta.size-1 - p_end-seq.insert_start-1 + 1
|
323
|
+
# if @stats[:low_qual][low_qual].nil?
|
324
|
+
# @stats[:low_qual][low_qual] = 0
|
325
|
+
# end
|
326
|
+
# @stats[:low_qual][low_qual] += 1
|
327
|
+
add_stats('low_qual',low_qual)
|
328
|
+
# @stats[:low_qual]={low_qual => 1}
|
329
|
+
end
|
330
|
+
|
331
|
+
# puts "-----ññññ----- high quality #{p_begin} #{p_end}"
|
332
|
+
|
333
|
+
|
334
|
+
if p_end<0 and p_end_old #add action low qual to all the part
|
335
|
+
a = seq.new_action(p_end_old+1 ,seq.seq_fasta.size-1,"ActionLowQuality") # adds the ActionInsert to the sequence before adding the actionMid
|
336
|
+
# puts "new low qual start: #{p_end_old+1} end: #{seq.seq_fasta.size-1} = #{seq.seq_fasta.size-1 - p_end_old-1 + 1}"
|
337
|
+
low_qual = seq.seq_fasta.size-1 - p_end_old-1 + 1
|
338
|
+
|
339
|
+
# if @stats[:low_qual][low_qual].nil?
|
340
|
+
# @stats[:low_qual][low_qual] = 0
|
341
|
+
# end
|
342
|
+
# @stats[:low_qual][low_qual] += 1
|
343
|
+
add_stats('low_qual',low_qual)
|
344
|
+
# @stats[:low_qual]={'low_qual' => 1}
|
345
|
+
|
346
|
+
actions.push a
|
347
|
+
end
|
348
|
+
|
349
|
+
# puts "------- ADDING ACTIONs LOW QUAL #{actions.size}"
|
350
|
+
seq.add_actions(actions)
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|
354
|
+
|
355
|
+
#-----------------------------------------------------------------
|
356
|
+
|
357
|
+
|
358
|
+
######################################################################
|
359
|
+
#---------------------------------------------------------------------
|
360
|
+
|
361
|
+
#Returns an array with the errors due to parameters are missing
|
362
|
+
def self.check_params(params)
|
363
|
+
|
364
|
+
errors=[]
|
365
|
+
|
366
|
+
comment='Minimum quality value for every nucleotide'
|
367
|
+
default_value = 20
|
368
|
+
params.check_param(errors,'min_quality','Integer',default_value,comment)
|
369
|
+
|
370
|
+
comment='Quality window for scanning low quality segments'
|
371
|
+
default_value = 15
|
372
|
+
params.check_param(errors,'window_width','Integer',default_value,comment)
|
373
|
+
|
374
|
+
|
375
|
+
|
376
|
+
return errors
|
377
|
+
end
|
378
|
+
|
379
|
+
|
380
|
+
private :find_high_quality
|
381
|
+
|
382
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
require "plugin"
|
2
|
+
|
3
|
+
|
4
|
+
########################################################
|
5
|
+
# Author: Almudena Bocinos Rioboo
|
6
|
+
#
|
7
|
+
# Defines the main methods that are necessary to execute PluginRemAditArtifacts
|
8
|
+
|
9
|
+
#
|
10
|
+
# Inherit: Plugin
|
11
|
+
########################################################
|
12
|
+
|
13
|
+
class PluginRemAditArtifacts < Plugin
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def exec_seq(seq,blast_query)
|
18
|
+
|
19
|
+
$LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
|
20
|
+
seq2 = seq.seq_fasta
|
21
|
+
first = 0
|
22
|
+
last = seq2.size-1
|
23
|
+
old_first=first
|
24
|
+
old_last=last
|
25
|
+
|
26
|
+
|
27
|
+
while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
|
28
|
+
first += 6
|
29
|
+
seq2.slice!(0..5)
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
|
34
|
+
last -= 6
|
35
|
+
seq2.slice!(seq2.size-1-5..seq2.size-1)
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
#is_forward, is_cDNA,
|
41
|
+
#TrimExtremeNXs(first,last)
|
42
|
+
is_forward = @params.get_param('is_forward')=='true'
|
43
|
+
is_cDNA = @params.get_param('is_cDNA')=='true'
|
44
|
+
|
45
|
+
previous_first,previous_last =0,0
|
46
|
+
|
47
|
+
until ((previous_first == first) && (previous_last == last))
|
48
|
+
previous_first,previous_last = first, last
|
49
|
+
|
50
|
+
if (is_cDNA)
|
51
|
+
if (is_forward)
|
52
|
+
|
53
|
+
nTs = 0
|
54
|
+
nTs = $1.length if (seq2 =~ /^(T+)/i)
|
55
|
+
|
56
|
+
if (nTs > 3)
|
57
|
+
seq2.slice!(0..nTs -1)
|
58
|
+
first += nTs #-1
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
nAs = 0
|
63
|
+
nAs = $1.length if (seq2 =~ /(A+)$/i)
|
64
|
+
|
65
|
+
if (nAs > 3)
|
66
|
+
seq2.slice!(seq2.size - nAs..seq2.size - 1)
|
67
|
+
last -= nAs
|
68
|
+
|
69
|
+
end
|
70
|
+
else #si es backward
|
71
|
+
|
72
|
+
nTs = 0
|
73
|
+
nTs = $1.length if (seq2 =~ /(T+)$/i)
|
74
|
+
|
75
|
+
if (nTs > 3)
|
76
|
+
seq2.slice!(seq2.size-nTs..seq2.size-1)
|
77
|
+
last -= nTs
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
nAs = 0
|
82
|
+
nAs = $1.length if (seq2 =~ /^(A+)/i)
|
83
|
+
|
84
|
+
if (nAs > 3)
|
85
|
+
seq2.slice!(0..nAs -1)
|
86
|
+
first += nAs
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
|
95
|
+
type='ActionRemAditArtifacts'
|
96
|
+
actions = []
|
97
|
+
# seq.add_action(first,last,type)
|
98
|
+
a=seq.new_action(first,last,type)
|
99
|
+
actions.push a
|
100
|
+
seq.add_actions(actions)
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
end
|
105
|
+
######################################################################
|
106
|
+
#---------------------------------------------------------------------
|
107
|
+
def execute_old(seq)
|
108
|
+
seq2 = seq.seq_fasta
|
109
|
+
#seq2 = 'dGCGGGG'
|
110
|
+
first = 0
|
111
|
+
last = seq2.size-1
|
112
|
+
old_first=first
|
113
|
+
old_last=last
|
114
|
+
|
115
|
+
# puts '1 '+seq2
|
116
|
+
# puts 'POS '+first.to_s
|
117
|
+
# puts 'POS '+last.to_s
|
118
|
+
while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
|
119
|
+
first += 6
|
120
|
+
seq2.slice!(0..5)
|
121
|
+
# puts '2 '+seq2
|
122
|
+
# already = true
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
|
127
|
+
last -= 6
|
128
|
+
seq2.slice!(seq2.size-1-5..seq2.size-1)
|
129
|
+
# puts '3 '+seq2
|
130
|
+
# already = true
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
#is_forward, is_cDNA,
|
135
|
+
#TrimExtremeNXs(first,last)
|
136
|
+
is_forward = @params.get_param('is_forward')
|
137
|
+
is_cDNA = @params.get_param('is_cDNA')
|
138
|
+
# puts '4 '+seq2
|
139
|
+
previous_first,previous_last =0,0
|
140
|
+
|
141
|
+
until ((previous_first == first) && (previous_last == last))
|
142
|
+
previous_first,previous_last = first, last
|
143
|
+
# puts 'POS5-F '+first.to_s
|
144
|
+
# puts 'POS5-L '+last.to_s
|
145
|
+
|
146
|
+
if (is_cDNA)
|
147
|
+
if (is_forward)
|
148
|
+
# puts '5 '+seq2
|
149
|
+
nTs = 0
|
150
|
+
nTs = $1.length if (seq2 =~ /^(T+)/i)
|
151
|
+
if (nTs > 3)
|
152
|
+
seq2.slice!(0..nTs -1)
|
153
|
+
# puts '6 '+seq2
|
154
|
+
first += nTs #-1
|
155
|
+
# puts 'POS6-F '+first.to_s
|
156
|
+
end
|
157
|
+
nAs = 0
|
158
|
+
nAs = $1.length if (seq2 =~ /(A+)$/i)
|
159
|
+
# puts '6-7 '+seq2 + nAs.to_s
|
160
|
+
if (nAs > 3)
|
161
|
+
# puts '7 '+seq2
|
162
|
+
seq2.slice!(seq2.size - nAs..seq2.size - 1)
|
163
|
+
last -= nAs#seq2.size-nAs-2
|
164
|
+
# puts 'POS7-L '+last.to_s
|
165
|
+
end
|
166
|
+
else #si es backward
|
167
|
+
# puts '5b '+seq2
|
168
|
+
nTs = 0
|
169
|
+
nTs = $1.length if (seq2 =~ /(T+)$/i)
|
170
|
+
if (nTs > 3)
|
171
|
+
# puts '6b '+seq2
|
172
|
+
seq2.slice!(seq2.size-nTs..seq2.size-1)
|
173
|
+
last -= nTs#seq2.size-nTs -2
|
174
|
+
# puts 'POS6b-L '+last.to_s
|
175
|
+
end
|
176
|
+
|
177
|
+
nAs = 0
|
178
|
+
nAs = $1.length if (seq2 =~ /^(A+)/i)
|
179
|
+
if (nAs > 3)
|
180
|
+
# puts '7b '+seq2
|
181
|
+
seq2.slice!(0..nAs -1)
|
182
|
+
first += nAs#nAs -1
|
183
|
+
# puts 'POS7b-f '+first.to_s
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
#first -= 1 if (old_first!= first)
|
190
|
+
#last += 1 if (old_last!= last)
|
191
|
+
|
192
|
+
# puts 'POS7-8 '+first.to_s
|
193
|
+
# puts 'POS7-8 '+last.to_s
|
194
|
+
|
195
|
+
if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
|
196
|
+
type='ActionRemAditArtifacts'
|
197
|
+
|
198
|
+
# puts '8 '+seq2
|
199
|
+
seq.add_action(first,last,type)
|
200
|
+
end
|
201
|
+
# puts '9 '+seq2
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
|
206
|
+
|
207
|
+
######################################################################
|
208
|
+
#---------------------------------------------------------------------
|
209
|
+
|
210
|
+
#Returns an array with the errors due to parameters are missing
|
211
|
+
def self.check_params(params)
|
212
|
+
errors=[]
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
# if !params.exists?('ta')
|
217
|
+
# errors.push " The param <ta> doesn't exist"
|
218
|
+
# end
|
219
|
+
|
220
|
+
# if !params.exists?('poly_at_length')
|
221
|
+
# errors.push " The param <poly_at_length> doesn't exist"
|
222
|
+
# end
|
223
|
+
|
224
|
+
|
225
|
+
|
226
|
+
return errors
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
end
|
data/lib/seqtrimnext.rb
CHANGED
@@ -1,8 +1,4 @@
|
|
1
|
-
|
2
|
-
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
-
|
4
|
-
|
5
|
-
# setup REQUIRE_PATH
|
1
|
+
require "seqtrimnext/version"
|
6
2
|
|
7
3
|
ROOT_PATH=File.join(File.dirname(__FILE__),'seqtrimnext')
|
8
4
|
|
@@ -22,16 +18,6 @@ $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
|
|
22
18
|
|
23
19
|
$: << File.expand_path(File.join(ROOT_PATH, 'latex','classes'))
|
24
20
|
|
25
|
-
# puts $:
|
26
|
-
|
27
21
|
module Seqtrimnext
|
28
|
-
|
29
|
-
# SEQTRIM_VERSION_REVISION=27
|
30
|
-
# SEQTRIM_VERSION_STAGE = 'b'
|
31
|
-
# SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
|
32
|
-
|
33
|
-
VERSION = '2.0.59'
|
34
|
-
|
35
|
-
SEQTRIM_VERSION = VERSION
|
36
|
-
|
22
|
+
# Your code goes here...
|
37
23
|
end
|
data/seqtrimnext.gemspec
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'seqtrimnext/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "seqtrimnext"
|
8
|
+
spec.version = Seqtrimnext::VERSION
|
9
|
+
spec.authors = ["Dario Guerrero", "Almudena Bocinos"]
|
10
|
+
spec.email = ["dariogf@gmail.com", "alkoke@gmail.com
|
11
|
+
t"]
|
12
|
+
spec.summary = %q{Sequences preprocessing and cleaning software}
|
13
|
+
spec.description = %q{Seqtrimnext is a plugin based system to preprocess and clean sequences from multiple NGS sequencing platforms}
|
14
|
+
spec.homepage = ""
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0")
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
+
|
25
|
+
spec.add_runtime_dependency 'narray','>=0'
|
26
|
+
spec.add_runtime_dependency 'gnuplot','>=0'
|
27
|
+
spec.add_runtime_dependency 'term-ansicolor','>=1.0.5'
|
28
|
+
spec.add_runtime_dependency 'xml-simple','>=1.0.12'
|
29
|
+
spec.add_runtime_dependency 'scbi_blast','>=0.0.34'
|
30
|
+
spec.add_runtime_dependency 'scbi_mapreduce','>=0.0.38'
|
31
|
+
spec.add_runtime_dependency 'scbi_fasta','>=0.1.7'
|
32
|
+
spec.add_runtime_dependency 'scbi_fastq','>=0.0.18'
|
33
|
+
spec.add_runtime_dependency 'scbi_plot','>=0.0.6'
|
34
|
+
spec.add_runtime_dependency 'scbi_math','>=0.0.1'
|
35
|
+
spec.add_runtime_dependency 'scbi_headers','>=0.0.2'
|
36
|
+
|
37
|
+
|
38
|
+
end
|