seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
require "plugin"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute PluginLinker
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class PluginExtractInserts < Plugin
|
|
11
|
+
|
|
12
|
+
#------------------------------------------------------
|
|
13
|
+
# check if part of a vector is in a linker
|
|
14
|
+
#------------------------------------------------------
|
|
15
|
+
def part_left_overlap?(r1_start,r1_end,r2_start,r2_end)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
return ((r1_start<r2_start) and (r1_end<=r2_end) and (r1_end>=r2_start) ) #overlap on the left of r2
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def part_right_overlap?(r1_start,r1_end,r2_start,r2_end)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
return ((r1_end>r2_end) and (r1_start<=r2_end) and (r1_start>=r2_start) ) #overlap on the right of r2
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# crea una action insert controlado si el inserto es corto o no y actualiza las stats según sea inserto izquierdo o derecho
|
|
32
|
+
def add_action_inserts(insert,linker,actions,seq)
|
|
33
|
+
|
|
34
|
+
insert_size = insert[1]-insert[0]+1
|
|
35
|
+
|
|
36
|
+
min_insert_size = @params.get_param('min_insert_size_paired').to_i
|
|
37
|
+
|
|
38
|
+
if (insert_size >= min_insert_size)
|
|
39
|
+
if (insert[0]<linker.start_pos) #insert is on the left from the linker
|
|
40
|
+
|
|
41
|
+
add_stats('left_insert_size',insert_size)
|
|
42
|
+
elsif (insert[0]>linker.end_pos) #insert is on the right from the linker
|
|
43
|
+
|
|
44
|
+
add_stats('right_insert_size',insert_size)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
a = seq.new_action(insert[0]-seq.insert_start,insert[1]-seq.insert_start,"ActionInsert") # adds the ActionInsert to the sequence
|
|
48
|
+
actions.push a
|
|
49
|
+
|
|
50
|
+
else
|
|
51
|
+
|
|
52
|
+
if (insert[0]<linker.start_pos) #insert is on the left from the linker
|
|
53
|
+
# @stats[:short_left_insert_size]={insert_size => 1}
|
|
54
|
+
add_stats('short_left_insert_size',insert_size)
|
|
55
|
+
elsif (insert[0]>linker.end_pos) #insert is on the right from the linker
|
|
56
|
+
# @stats[:short_right_insert_size]={insert_size => 1}
|
|
57
|
+
add_stats('short_right_insert_size',insert_size)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
#create an ActionShortInsert before the ActionLinker
|
|
62
|
+
|
|
63
|
+
# adds the ActionInsert to the sequence
|
|
64
|
+
a = seq.new_action(insert[0]-seq.insert_start,insert[1]-seq.insert_start,"ActionShortInsert") # adds the ActionInsert to the sequence
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
actions.push a
|
|
68
|
+
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#-------------------------------------------------------------------------
|
|
78
|
+
#It's created an ActionInsert or ActionShortInsert before the ActionLinker
|
|
79
|
+
#Used: in class PluginLinker and PluginMid
|
|
80
|
+
#-------------------------------------------------------------------------
|
|
81
|
+
def add_action_before_linker(overlap,actions,seq)
|
|
82
|
+
|
|
83
|
+
# puts "INSERT1: [#{seq.insert_start},#{overlap.start_pos}]"
|
|
84
|
+
|
|
85
|
+
insert_size = overlap.start_pos - seq.insert_start
|
|
86
|
+
|
|
87
|
+
min_insert_size = @params.get_param('min_insert_size_trimmed').to_i
|
|
88
|
+
|
|
89
|
+
# puts "INSERT1: [#{overlap.start_pos},#{seq.insert_start} #{seq.insert_end} seqsize #{seq.seq_fasta.size} insert_size #{insert_size} #{min_insert_size}]"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
if ((overlap.start_pos>seq.insert_start) && (insert_size >= min_insert_size))#if overlap's positions are right
|
|
94
|
+
#It's created an ActionInsert or ActionShortInsert before the Actionoverlap
|
|
95
|
+
# a = seq.new_action(seq.insert_start_last,overlap.start_pos-1-seq.insert_start,"ActionInsert") # adds the ActionInsert to the sequence
|
|
96
|
+
a = seq.new_action(0,overlap.start_pos-1-seq.insert_start,"ActionInsert") # adds the ActionInsert to the sequence
|
|
97
|
+
|
|
98
|
+
actions.push a
|
|
99
|
+
# puts " 1---------- Inserto antes del linker en pos #{a.start_pos} #{a.end_pos}"
|
|
100
|
+
elsif (overlap.start_pos>seq.insert_start) #if overlap's positions are right and insert's size is short
|
|
101
|
+
# puts " 2---------- #{seq.insert_start},#{overlap.start_pos}-1-#{seq.insert_start}"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
#It's created an ActionShortInsert before the ActionLinker
|
|
105
|
+
# a = seq.new_action(seq.insert_start_last-seq.insert_start,overlap.start_pos-1-seq.insert_start,"ActionShortInsert") # adds the ActionInsert to the sequence
|
|
106
|
+
a = seq.new_action(0,overlap.start_pos-1-seq.insert_start,"ActionShortInsert") # adds the ActionInsert to the sequence
|
|
107
|
+
|
|
108
|
+
actions.push a
|
|
109
|
+
# puts " 2---------- Inserto corto antes del linker en pos #{a.start_pos} #{a.end_pos}"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@stats[:insert_size_left]={insert_size => 1}
|
|
116
|
+
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
#-------------------------------------------------------------------------
|
|
120
|
+
#It's created an ActionInsert or ActionShortInsert after the ActionLinker
|
|
121
|
+
#-------------------------------------------------------------------------
|
|
122
|
+
def add_action_after_linker(overlap,actions,seq)
|
|
123
|
+
|
|
124
|
+
# puts "INSERT2: [#{overlap.end_pos},#{seq.insert_end}]"
|
|
125
|
+
|
|
126
|
+
insert_size = seq.insert_end-overlap.end_pos
|
|
127
|
+
|
|
128
|
+
min_insert_size = @params.get_param('min_insert_size_trimmed').to_i
|
|
129
|
+
|
|
130
|
+
# puts "INSERT_SIZE2 #{insert_size} > #{min_insert_size}"
|
|
131
|
+
|
|
132
|
+
# puts "INSERT2: [#{overlap.end_pos},#{seq.insert_start} #{seq.insert_end} #{seq.seq_fasta.size} #{seq.seq_fasta_orig.size}]"
|
|
133
|
+
|
|
134
|
+
if ((overlap.end_pos-seq.insert_start < seq.seq_fasta_orig.size-1) && (insert_size>=min_insert_size) ) #if overlap's positions are left
|
|
135
|
+
#It's created an ActionInsert after the Actionoverlap
|
|
136
|
+
a = seq.new_action(overlap.end_pos-seq.insert_start+1,seq.seq_fasta.size-1,"ActionInsert") # adds the ActionInsert to the sequence
|
|
137
|
+
# puts " new after action #{overlap.end_pos} - #{seq.insert_start}"
|
|
138
|
+
# puts " 1---new insert despues del linker #{a.start_pos} #{a.end_pos} "
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
actions.push a
|
|
142
|
+
|
|
143
|
+
elsif (overlap.end_pos-seq.insert_start<seq.seq_fasta_orig.size-1) #if overlap's positions are right and insert's size is short
|
|
144
|
+
#It's created an ActionInsert after the ActionLinker
|
|
145
|
+
a = seq.new_action(overlap.end_pos-seq.insert_start+1,seq.seq_fasta.size-1,"ActionShortInsert") # adds the ActionInsert to the sequence
|
|
146
|
+
|
|
147
|
+
# puts " new after action #{overlap.end_pos} - #{seq.insert_start} +1"
|
|
148
|
+
# puts "2---new insert short despues del linker #{a.start_pos} #{a.end_pos} "
|
|
149
|
+
actions.push a
|
|
150
|
+
end
|
|
151
|
+
# puts "#{a.start_pos} #{a.end_pos}" if !a.nil?
|
|
152
|
+
@stats[:insert_size_right]={insert_size => 1}
|
|
153
|
+
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def split_by(actions,sub_inserts)
|
|
164
|
+
|
|
165
|
+
delete=false
|
|
166
|
+
|
|
167
|
+
# puts " split #{sub_inserts.each{|i| i.join(',')}}"
|
|
168
|
+
if !sub_inserts.empty?
|
|
169
|
+
actions.each do |action|
|
|
170
|
+
sub_inserts.reverse_each do |sub_i|
|
|
171
|
+
# puts "A: [#{action.start_pos},#{action.end_pos}] cuts [#{sub_i[0]},#{sub_i[1]}] "
|
|
172
|
+
if ((action.start_pos<=sub_i[0]) && (action.end_pos>=sub_i[1]))
|
|
173
|
+
# if not exists any subinsert
|
|
174
|
+
delete=true
|
|
175
|
+
|
|
176
|
+
elsif ((action.end_pos>=sub_i[0]) && (action.end_pos+1<=sub_i[1]))
|
|
177
|
+
# if exists an subinsert between the action one and the end of subinsert
|
|
178
|
+
|
|
179
|
+
sub_inserts.push [action.end_pos+1,sub_i[1]] # mark subinsert after the action
|
|
180
|
+
|
|
181
|
+
delete=true
|
|
182
|
+
# puts " !!!! 1"
|
|
183
|
+
if ((action.start_pos-1>=sub_i[0]))
|
|
184
|
+
# if exists an subinsert between the start of the subinsert and the action
|
|
185
|
+
sub_inserts.push [sub_i[0],action.start_pos-1] # mark subinsert before the action
|
|
186
|
+
delete=true
|
|
187
|
+
|
|
188
|
+
# puts " !!!! 2-1"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
elsif ((action.start_pos-1>=sub_i[0]) && (action.start_pos<=sub_i[1]))
|
|
192
|
+
# if exists an subinsert between the start of the subinsert and the action
|
|
193
|
+
sub_inserts.push [sub_i[0],action.start_pos-1,] # mark subinsert before the action
|
|
194
|
+
delete=true
|
|
195
|
+
|
|
196
|
+
# puts " !!!! 2-2"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# sub_inserts.delete [sub_i[0],sub_i[1]] and delete=false and puts " DELETEEE ___________ #{delete}" if delete
|
|
203
|
+
if delete
|
|
204
|
+
sub_inserts.delete [sub_i[0],sub_i[1]]
|
|
205
|
+
delete=false
|
|
206
|
+
# puts " DELETEEE ___________ #{delete} #{[sub_i[0] , sub_i[1]] }"
|
|
207
|
+
end
|
|
208
|
+
# puts " eee #{sub_inserts.join(',')}"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
end #each sub_insert
|
|
212
|
+
end #each action
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
#select the best subinsert, when there is not a linker
|
|
217
|
+
def select_the_best(sub_inserts)
|
|
218
|
+
|
|
219
|
+
insert_size = 0
|
|
220
|
+
|
|
221
|
+
insert = nil
|
|
222
|
+
|
|
223
|
+
sub_inserts.each do |sub_i|
|
|
224
|
+
|
|
225
|
+
if (insert_size<(sub_i[1]-sub_i[0]+1))
|
|
226
|
+
insert_size = (sub_i[1]-sub_i[0]+1)
|
|
227
|
+
insert=sub_i
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
sub_inserts=[]
|
|
233
|
+
sub_inserts.push insert if !insert.nil?
|
|
234
|
+
|
|
235
|
+
# puts " subinsert #{sub_inserts.join(' ')}"
|
|
236
|
+
|
|
237
|
+
return sub_inserts
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
#select the best subinsert when there is a linker
|
|
241
|
+
def select_the_best_with_linker(sub_inserts,linker)
|
|
242
|
+
|
|
243
|
+
left_insert_size = 0
|
|
244
|
+
right_insert_size = 0
|
|
245
|
+
|
|
246
|
+
left_insert = nil
|
|
247
|
+
right_insert = nil
|
|
248
|
+
|
|
249
|
+
sub_inserts.each do |sub_i|
|
|
250
|
+
#puts "*SBI: "+sub_i.join(',')
|
|
251
|
+
if (sub_i[0]<linker.start_pos) #if the subinsert is on the left from the linker
|
|
252
|
+
if (left_insert_size<(sub_i[1]-sub_i[0]+1))
|
|
253
|
+
left_insert_size = (sub_i[1]-sub_i[0]+1)
|
|
254
|
+
left_insert=sub_i
|
|
255
|
+
# puts " left"
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
elsif (sub_i[0]>linker.end_pos) #if the subinsert is on the right from the linker
|
|
259
|
+
if (right_insert_size<(sub_i[1]-sub_i[0]+1))
|
|
260
|
+
right_insert_size = (sub_i[1]-sub_i[0]+1)
|
|
261
|
+
right_insert=sub_i
|
|
262
|
+
end
|
|
263
|
+
# puts " right"
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
# puts " left #{left_insert} #{left_insert_size} right #{right_insert} #{right_insert_size}"
|
|
267
|
+
sub_inserts=[]
|
|
268
|
+
sub_inserts.push left_insert if !left_insert.nil?
|
|
269
|
+
sub_inserts.push right_insert if !right_insert.nil?
|
|
270
|
+
# puts " subinsert #{sub_inserts.join(' ')}"
|
|
271
|
+
#puts "SELECTED SUBINSERTS"
|
|
272
|
+
# sub_inserts.each do |sub_i|
|
|
273
|
+
# puts "*SBI: "+sub_i.join(',')
|
|
274
|
+
# end
|
|
275
|
+
#
|
|
276
|
+
return sub_inserts
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# Begins the plugin1's execution to warn that there is linker into the sequence
|
|
281
|
+
def execute(seqs)
|
|
282
|
+
seqs.each do |s|
|
|
283
|
+
exec_seq(s)
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def exec_seq(seq)
|
|
289
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
|
|
290
|
+
|
|
291
|
+
# puts "INSERTO ANTES LINKER INSERT:"+seq.seq_fasta
|
|
292
|
+
|
|
293
|
+
#look for ActionLinker into the sequence's actions
|
|
294
|
+
linkers=seq.get_actions(ActionLinker)
|
|
295
|
+
#look for ActionVectors into the sequence's actions
|
|
296
|
+
vectors=seq.get_actions(ActionVectors)
|
|
297
|
+
#look for ActionLowQuality into the sequence's actions
|
|
298
|
+
low_quals=seq.get_actions(ActionLowQuality)
|
|
299
|
+
|
|
300
|
+
insert_size=0
|
|
301
|
+
actions=[]
|
|
302
|
+
sub_inserts=[]
|
|
303
|
+
|
|
304
|
+
if (linkers.count==1) #linker found
|
|
305
|
+
linker=linkers[0]
|
|
306
|
+
|
|
307
|
+
# get left insert
|
|
308
|
+
if linker.start_pos>seq.insert_start
|
|
309
|
+
sub_inserts.push [ seq.insert_start,linker.start_pos-1 ]
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
#get right insert
|
|
313
|
+
if linker.end_pos<seq.insert_end
|
|
314
|
+
sub_inserts.push [ linker.end_pos+1, seq.insert_end]
|
|
315
|
+
end
|
|
316
|
+
# puts '1ST SUBS:'
|
|
317
|
+
# puts sub_inserts.join("\n")
|
|
318
|
+
#split sub_inserts by vectors
|
|
319
|
+
split_by(vectors,sub_inserts)
|
|
320
|
+
# puts 'SUBS:'
|
|
321
|
+
# puts sub_inserts.join("\n")
|
|
322
|
+
|
|
323
|
+
#sub_inserts=select_the_best_with_linker(sub_inserts,linker) if not sub_inserts.empty?
|
|
324
|
+
|
|
325
|
+
# split by low qual actions
|
|
326
|
+
split_by(low_quals,sub_inserts)
|
|
327
|
+
# puts 'SUBS:'
|
|
328
|
+
# puts sub_inserts.join("\n")
|
|
329
|
+
sub_inserts=select_the_best_with_linker(sub_inserts,linker) if not sub_inserts.empty?
|
|
330
|
+
|
|
331
|
+
if sub_inserts.empty?
|
|
332
|
+
# if is an empty insert
|
|
333
|
+
a=seq.new_action(0,0,'ActionEmptyInsert')
|
|
334
|
+
seq.seq_rejected=true
|
|
335
|
+
seq.seq_rejected_by_message='empty insert'
|
|
336
|
+
actions.push a
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
sub_inserts.each do |sub_i|
|
|
340
|
+
add_action_inserts(sub_i,linker,actions,seq) # ponerlo también abajo para que controle si la accion es de inserto corto o no
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
else # no linker found => add whole insert
|
|
345
|
+
|
|
346
|
+
sub_inserts.push [ seq.insert_start, seq.insert_end ]
|
|
347
|
+
|
|
348
|
+
split_by(vectors,sub_inserts)
|
|
349
|
+
|
|
350
|
+
#sub_inserts=select_the_best(sub_inserts) if not sub_inserts.empty?
|
|
351
|
+
|
|
352
|
+
split_by(low_quals,sub_inserts)
|
|
353
|
+
|
|
354
|
+
sub_inserts=select_the_best(sub_inserts) if not sub_inserts.empty?
|
|
355
|
+
|
|
356
|
+
# ordena los subinsertos por tamaño
|
|
357
|
+
# sub_inserts.sort!{|i,j| j[1]-j[0]<=>i[1]-i[0]}
|
|
358
|
+
|
|
359
|
+
if sub_inserts.empty?
|
|
360
|
+
found_insert_size = 0 # position from an empty insert
|
|
361
|
+
|
|
362
|
+
a=seq.new_action(0,0,'ActionEmptyInsert') # refactorizando codigo
|
|
363
|
+
seq.seq_rejected=true
|
|
364
|
+
seq.seq_rejected_by_message='empty insert'
|
|
365
|
+
else
|
|
366
|
+
found_insert_size =(sub_inserts[0][1]-sub_inserts[0][0]+1)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
if (found_insert_size >= (@params.get_param('min_insert_size_trimmed').to_i))
|
|
371
|
+
add_stats('insert_size',found_insert_size)
|
|
372
|
+
a = seq.new_action(sub_inserts[0][0]-seq.insert_start, sub_inserts[0][1]-seq.insert_start,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
|
373
|
+
elsif (found_insert_size!=0) # if is a short insert
|
|
374
|
+
add_stats('short_insert_size',found_insert_size)
|
|
375
|
+
a = seq.new_action(sub_inserts[0][0]-seq.insert_start, sub_inserts[0][1]-seq.insert_start,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
|
376
|
+
seq.seq_rejected=true
|
|
377
|
+
seq.seq_rejected_by_message='short insert'
|
|
378
|
+
end
|
|
379
|
+
actions.push a
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
seq.add_actions(actions)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# find inserts to see if it is necessary to reject it
|
|
386
|
+
|
|
387
|
+
if ! seq.seq_rejected
|
|
388
|
+
inserts=seq.get_actions(ActionInsert)
|
|
389
|
+
if inserts.empty?
|
|
390
|
+
seq.seq_rejected=true
|
|
391
|
+
|
|
392
|
+
if seq.get_actions(ActionShortInsert).empty?
|
|
393
|
+
seq.seq_rejected_by_message='empty insert'
|
|
394
|
+
else
|
|
395
|
+
seq.seq_rejected_by_message='short insert'
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
#Returns an array with the errors due to parameters are missing
|
|
402
|
+
|
|
403
|
+
def self.check_params(params)
|
|
404
|
+
errors=[]
|
|
405
|
+
|
|
406
|
+
# self.check_param(errors,params,'min_insert_size_trimmed','Integer')
|
|
407
|
+
|
|
408
|
+
return errors
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def self.plot_setup(stats_value,stats_name,x,y,init_stats,plot)
|
|
412
|
+
|
|
413
|
+
# puts "============== #{stats_name}"
|
|
414
|
+
|
|
415
|
+
# puts stats_name
|
|
416
|
+
case stats_name
|
|
417
|
+
|
|
418
|
+
when 'insert_size'
|
|
419
|
+
plot.x_label= "Length"
|
|
420
|
+
plot.y_label= "Count"
|
|
421
|
+
plot.x_range="[0:#{init_stats['biggest_sequence_size'].to_i}]"
|
|
422
|
+
# plot.x_range="[0:200]"
|
|
423
|
+
|
|
424
|
+
plot.add_x(x)
|
|
425
|
+
plot.add_y(y)
|
|
426
|
+
|
|
427
|
+
plot.do_graph
|
|
428
|
+
|
|
429
|
+
return true
|
|
430
|
+
else
|
|
431
|
+
return false
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
end
|