seqtrimnext 2.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,246 @@
1
+ require "plugin"
2
+
3
+
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute PluginRemAditArtifacts
8
+
9
+ #
10
+ # Inherit: Plugin
11
+ ########################################################
12
+
13
+ class PluginRemAditArtifacts < Plugin
14
+
15
+
16
+
17
+ # Begins the plugin_low_high_size's execution whit the sequence "seq"
18
+ # Returns a list with start of polyA or polyT seq or 0 if not found
19
+ # start of a possible second polyAT what was found in the second search, since it looks for both
20
+ # Uses the param polyA_length to look for at least that number of contiguous A's
21
+ def execute(seqs)
22
+ seqs.each do |s|
23
+ exec_seq(s)
24
+ end
25
+ end
26
+
27
+
28
+ def exec_seq(seq)
29
+
30
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
31
+ seq2 = seq.seq_fasta
32
+ first = 0
33
+ last = seq2.size-1
34
+ old_first=first
35
+ old_last=last
36
+
37
+
38
+ while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
39
+ first += 6
40
+ seq2.slice!(0..5)
41
+
42
+ end
43
+
44
+
45
+ while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
46
+ last -= 6
47
+ seq2.slice!(seq2.size-1-5..seq2.size-1)
48
+
49
+ end
50
+
51
+
52
+ #is_forward, is_cDNA,
53
+ #TrimExtremeNXs(first,last)
54
+ is_forward = @params.get_param('is_forward')=='true'
55
+ is_cDNA = @params.get_param('is_cDNA')=='true'
56
+
57
+ previous_first,previous_last =0,0
58
+
59
+ until ((previous_first == first) && (previous_last == last))
60
+ previous_first,previous_last = first, last
61
+
62
+ if (is_cDNA)
63
+ if (is_forward)
64
+
65
+ nTs = 0
66
+ nTs = $1.length if (seq2 =~ /^(T+)/i)
67
+
68
+ if (nTs > 3)
69
+ seq2.slice!(0..nTs -1)
70
+ first += nTs #-1
71
+
72
+ end
73
+
74
+ nAs = 0
75
+ nAs = $1.length if (seq2 =~ /(A+)$/i)
76
+
77
+ if (nAs > 3)
78
+ seq2.slice!(seq2.size - nAs..seq2.size - 1)
79
+ last -= nAs
80
+
81
+ end
82
+ else #si es backward
83
+
84
+ nTs = 0
85
+ nTs = $1.length if (seq2 =~ /(T+)$/i)
86
+
87
+ if (nTs > 3)
88
+ seq2.slice!(seq2.size-nTs..seq2.size-1)
89
+ last -= nTs
90
+
91
+ end
92
+
93
+ nAs = 0
94
+ nAs = $1.length if (seq2 =~ /^(A+)/i)
95
+
96
+ if (nAs > 3)
97
+ seq2.slice!(0..nAs -1)
98
+ first += nAs
99
+
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+
106
+ if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
107
+ type='ActionRemAditArtifacts'
108
+ actions = []
109
+ # seq.add_action(first,last,type)
110
+ a=seq.new_action(first,last,type)
111
+ actions.push a
112
+ seq.add_actions(actions)
113
+ end
114
+
115
+
116
+ end
117
+ ######################################################################
118
+ #---------------------------------------------------------------------
119
+ def execute_old(seq)
120
+ seq2 = seq.seq_fasta
121
+ #seq2 = 'dGCGGGG'
122
+ first = 0
123
+ last = seq2.size-1
124
+ old_first=first
125
+ old_last=last
126
+
127
+ # puts '1 '+seq2
128
+ # puts 'POS '+first.to_s
129
+ # puts 'POS '+last.to_s
130
+ while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
131
+ first += 6
132
+ seq2.slice!(0..5)
133
+ # puts '2 '+seq2
134
+ # already = true
135
+ end
136
+
137
+
138
+ while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
139
+ last -= 6
140
+ seq2.slice!(seq2.size-1-5..seq2.size-1)
141
+ # puts '3 '+seq2
142
+ # already = true
143
+ end
144
+
145
+
146
+ #is_forward, is_cDNA,
147
+ #TrimExtremeNXs(first,last)
148
+ is_forward = @params.get_param('is_forward')
149
+ is_cDNA = @params.get_param('is_cDNA')
150
+ # puts '4 '+seq2
151
+ previous_first,previous_last =0,0
152
+
153
+ until ((previous_first == first) && (previous_last == last))
154
+ previous_first,previous_last = first, last
155
+ # puts 'POS5-F '+first.to_s
156
+ # puts 'POS5-L '+last.to_s
157
+
158
+ if (is_cDNA)
159
+ if (is_forward)
160
+ # puts '5 '+seq2
161
+ nTs = 0
162
+ nTs = $1.length if (seq2 =~ /^(T+)/i)
163
+ if (nTs > 3)
164
+ seq2.slice!(0..nTs -1)
165
+ # puts '6 '+seq2
166
+ first += nTs #-1
167
+ # puts 'POS6-F '+first.to_s
168
+ end
169
+ nAs = 0
170
+ nAs = $1.length if (seq2 =~ /(A+)$/i)
171
+ # puts '6-7 '+seq2 + nAs.to_s
172
+ if (nAs > 3)
173
+ # puts '7 '+seq2
174
+ seq2.slice!(seq2.size - nAs..seq2.size - 1)
175
+ last -= nAs#seq2.size-nAs-2
176
+ # puts 'POS7-L '+last.to_s
177
+ end
178
+ else #si es backward
179
+ # puts '5b '+seq2
180
+ nTs = 0
181
+ nTs = $1.length if (seq2 =~ /(T+)$/i)
182
+ if (nTs > 3)
183
+ # puts '6b '+seq2
184
+ seq2.slice!(seq2.size-nTs..seq2.size-1)
185
+ last -= nTs#seq2.size-nTs -2
186
+ # puts 'POS6b-L '+last.to_s
187
+ end
188
+
189
+ nAs = 0
190
+ nAs = $1.length if (seq2 =~ /^(A+)/i)
191
+ if (nAs > 3)
192
+ # puts '7b '+seq2
193
+ seq2.slice!(0..nAs -1)
194
+ first += nAs#nAs -1
195
+ # puts 'POS7b-f '+first.to_s
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ #first -= 1 if (old_first!= first)
202
+ #last += 1 if (old_last!= last)
203
+
204
+ # puts 'POS7-8 '+first.to_s
205
+ # puts 'POS7-8 '+last.to_s
206
+
207
+ if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
208
+ type='ActionRemAditArtifacts'
209
+
210
+ # puts '8 '+seq2
211
+ seq.add_action(first,last,type)
212
+ end
213
+ # puts '9 '+seq2
214
+
215
+ end
216
+
217
+
218
+
219
+ ######################################################################
220
+ #---------------------------------------------------------------------
221
+
222
+ #Returns an array with the errors due to parameters are missing
223
+ def self.check_params(params)
224
+ errors=[]
225
+
226
+
227
+
228
+ # if !params.exists?('ta')
229
+ # errors.push " The param <ta> doesn't exist"
230
+ # end
231
+
232
+ # if !params.exists?('poly_at_length')
233
+ # errors.push " The param <poly_at_length> doesn't exist"
234
+ # end
235
+
236
+
237
+
238
+ return errors
239
+ end
240
+
241
+
242
+
243
+
244
+
245
+
246
+ end
@@ -0,0 +1,244 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginShortInserted
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginShortInsert < Plugin
11
+
12
+ def cut_by(items,sub_inserts)
13
+
14
+
15
+ delete=false
16
+ # puts " eee1 #{sub_inserts.inspect} item #{items.inspect}"
17
+ # puts " eee1 #{sub_inserts.join('-')}"
18
+
19
+ items.each do |item|
20
+ sub_inserts.each do |sub_i|
21
+
22
+ if ((item.start_pos<=sub_i[0]) && (item.end_pos>=sub_i[1]))
23
+ # if not exists any subinsert
24
+ delete=true
25
+
26
+ elsif ((item.end_pos>=sub_i[0]) && (item.end_pos+1<=sub_i[1]))
27
+ # if exists an subinsert between the item one and the end of subinsert
28
+
29
+ sub_inserts.push [item.end_pos+1,sub_i[1]] # mark subinsert after the item
30
+
31
+ delete=true
32
+ # puts " !!!! 1 #{sub_inserts.inspect}"
33
+ if ((item.start_pos-1>=sub_i[0]))
34
+ # if exists an subinsert between the start of the subinsert and the item
35
+ sub_inserts.push [sub_i[0],item.start_pos-1] # mark subinsert before the item
36
+ delete=true
37
+
38
+ # puts " !!!! 2-1 #{sub_inserts.inspect}"
39
+ end
40
+
41
+ elsif ((item.start_pos-1>=sub_i[0]) && (item.start_pos<=sub_i[1]))
42
+ # if exists an subinsert between the start of the subinsert and the item
43
+ sub_inserts.push [sub_i[0],item.start_pos-1,] # mark subinsert before the item
44
+ delete=true
45
+
46
+ # puts " !!!! 2-2 #{sub_inserts.inspect}"
47
+
48
+
49
+ end
50
+
51
+
52
+ # sub_inserts.delete [sub_i[0],sub_i[1]] and delete=false and puts " DELETEEE ___________ #{delete}" if delete
53
+ if delete
54
+ sub_inserts.delete [sub_i[0],sub_i[1]]
55
+ delete=false
56
+ # puts " DELETEEE ___________ #{delete} #{[sub_i[0] , sub_i[1]] }"
57
+ end
58
+ # puts " eee2 #{sub_inserts.join(',')}"
59
+
60
+
61
+ end #each sub_insert
62
+ end #each low_qual
63
+ end
64
+
65
+ #select the best subinsert, when there is not a linker
66
+ def select_the_best(sub_inserts)
67
+
68
+ insert_size = 0
69
+
70
+ insert = nil
71
+
72
+ sub_inserts.each do |sub_i|
73
+
74
+ if (insert_size<(sub_i[1]-sub_i[0]+1))
75
+ insert_size = (sub_i[1]-sub_i[0]+1)
76
+ insert=sub_i
77
+ end
78
+
79
+ end
80
+
81
+ sub_inserts=[]
82
+ sub_inserts.push insert if !insert.nil?
83
+
84
+ # puts " subinsert #{sub_inserts.join(' ')}"
85
+
86
+ return sub_inserts
87
+ end
88
+
89
+ #Begins the plugin1's execution to warn if the inserted is so short
90
+ def execute(seqs)
91
+ seqs.each do |s|
92
+ exec_seq(s)
93
+ end
94
+ end
95
+
96
+
97
+ def exec_seq(seq)
98
+
99
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
100
+ # puts "inserto #{seq.insert_start}, #{seq.insert_end} size #{seq.seq_fasta.size}"
101
+
102
+ if (seq.seq_fasta.size > 0)
103
+
104
+ # para acciones que no tienen activado el cortar, se las corta aquí
105
+ sub_inserts=[]
106
+ sub_inserts.push [ seq.insert_start, seq.insert_end]
107
+ low_quals=seq.get_actions(ActionLowQuality)
108
+ # puts "low qual size #{low_quals.size}"
109
+ cut_by(low_quals,sub_inserts)
110
+
111
+ # puts '?' + sub_inserts.join('?')
112
+
113
+ if sub_inserts.empty?
114
+ p_beg,p_end = 0,-1 # position from an empty insert
115
+
116
+ else
117
+ sub_inserts=select_the_best(sub_inserts)
118
+
119
+ #vemos el tamaño del inserto actual
120
+ # puts " antes current_insert #{seq.seq_fasta.length}"
121
+ # p_beg,p_end = seq.current_insert
122
+
123
+ # p_beg,p_end = seq.insert_bounds
124
+ p_beg,p_end = sub_inserts[0][0],sub_inserts[0][1] # insert positions
125
+ # puts " p_beg p_end #{p_beg} #{p_end}"
126
+
127
+
128
+ # puts " despues current_insert #{p_beg} #{p_end}"
129
+ size_min_insert = @params.get_param('min_insert_size_trimmed').to_i
130
+ end
131
+
132
+ else
133
+ p_beg,p_end = 0,-1 # position from an empty insert
134
+ # puts " p_beg p_end #{p_beg} #{p_end} size= #{p_end-p_beg+1}"
135
+ end
136
+
137
+ # puts "INSERTO:"+seq.seq_fasta
138
+ actions=[]
139
+ # puts " in PLUGIN SHORT INSERT previous to add action #{p_beg} #{p_end}"
140
+ if p_end-p_beg+1 <= 0
141
+ type = "ActionEmptyInsert"
142
+ # puts " in PLUGIN EMPTY previous to add action #{p_beg} #{p_end}"
143
+ # a = seq.add_action(p_beg,p_end,type)
144
+ a=seq.new_action(0,0,type)
145
+ actions.push a
146
+ add_stats('short_inserts',0)
147
+ # puts "1 p_beg p_end #{p_beg} #{p_end}"
148
+
149
+ seq.seq_rejected=true
150
+ seq.seq_rejected_by_message='empty insert'
151
+ elsif ((p_end-p_beg+1)<size_min_insert)
152
+ type = "ActionShortInsert"
153
+ a_beg,a_end = p_beg-seq.insert_start, p_end-seq.insert_start
154
+
155
+ # puts " in PLUGIN SHORT previous to add action"
156
+ # a = seq.add_action(p_beg,p_end,type)
157
+ a=seq.new_action(a_beg,a_end,type)
158
+ actions.push a
159
+ add_stats('short_inserts',a_end-a_beg+1)
160
+
161
+ # puts "2 p_beg p_end #{p_beg} #{p_end}"
162
+
163
+ seq.seq_rejected=true
164
+ seq.seq_rejected_by_message='short insert'
165
+ else
166
+ type= "ActionInsert"
167
+
168
+ # a=seq.add_action(p_beg,p_end,type)
169
+ a_beg,a_end = sub_inserts[0][0]-seq.insert_start, sub_inserts[0][1]-seq.insert_start
170
+ a=seq.new_action(a_beg,a_end,type)
171
+ actions.push a
172
+
173
+ add_stats('inserts',a_end-a_beg+1)
174
+
175
+ # puts "3 p_beg p_end #{p_beg} #{p_end}"
176
+ end
177
+
178
+ seq.add_actions(actions)
179
+
180
+
181
+ end
182
+
183
+
184
+ #Begins the plugin1's execution to warn if the inserted is so short
185
+ def execute_no_cut_quality(seq)
186
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
187
+
188
+
189
+
190
+ #vemos el tamaño del inserto actual
191
+ # puts " antes current_insert #{seq.seq_fasta.length}"
192
+ # p_beg,p_end = seq.current_insert
193
+ p_beg,p_end = seq.insert_bounds
194
+ # puts " despues current_insert #{p_beg} #{p_end}"
195
+ size_min_insert = @params.get_param('min_insert_size_trimmed').to_i
196
+
197
+ # puts "INSERTO:"+seq.seq_fasta
198
+ actions=[]
199
+ # puts " in PLUGIN SHORT INSERT previous to add action #{p_beg} #{p_end}"
200
+ if p_end-p_beg+1 <= 0
201
+ type = "ActionEmptyInsert"
202
+ # puts " in PLUGIN EMPTY previous to add action #{p_beg} #{p_end}"
203
+ # a = seq.add_action(p_beg,p_end,type)
204
+ a=seq.new_action(0,0,type)
205
+ actions.push a
206
+
207
+ elsif ((p_end-p_beg+1)<size_min_insert)
208
+ type = "ActionShortInsert"
209
+ # puts " in PLUGIN SHORT previous to add action"
210
+ # a = seq.add_action(p_beg,p_end,type)
211
+ a=seq.new_action(0,p_end-p_beg,type)
212
+
213
+ actions.push a
214
+ else
215
+ type= "ActionInsert"
216
+
217
+ # a=seq.add_action(p_beg,p_end,type)
218
+ a=seq.new_action(0,p_end-p_beg,type)
219
+ actions.push a
220
+ end
221
+
222
+ seq.add_actions(actions)
223
+
224
+ end
225
+
226
+ #Returns an array with the errors due to parameters are missing
227
+ def self.check_params(params)
228
+ errors=[]
229
+
230
+ self.check_param(errors,params,'min_insert_size_trimmed','Integer')
231
+
232
+ # if !params.exists?('genus')
233
+ # errors.push " The param genus doesn't exist "
234
+ # end
235
+
236
+ # if !params.exists?('p2')
237
+ # errors.push " The param p2 doesn't exist"
238
+ # end
239
+
240
+ return errors
241
+ end
242
+
243
+
244
+ end
@@ -0,0 +1,191 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginVectors
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginVectors < Plugin
11
+
12
+ # MIN_VECTOR_SIZE=30
13
+ # MAX_TO_EXTREME=(MIN_VECTOR_SIZE/2).to_i
14
+ MAX_TARGETS_SEQS=20 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
15
+
16
+ def near_to_extrem(c,seq,min_vector_size)
17
+ max_to_extreme=(min_vector_size/2).to_i
18
+ return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
19
+ end
20
+
21
+ def all_vector_in_linker(vector_beg,vector_end,seq)
22
+ linkers=seq.get_actions(ActionLinker)
23
+ # res=((linkers.count>=1) && (vector_beg>=linkers[0].start_pos) && (vector_end<=linkers[0].end_pos))
24
+ # puts " RES #{res} insert-start #{seq.insert_start} #{linkers.count}>=1 #{vector_beg+seq.insert_start}>=#{linkers[0].start_pos}) && #{vector_end+seq.insert_start}<=#{linkers[0].end_pos})) "
25
+ return ((linkers.count>=1) && (vector_beg+seq.insert_start>=linkers[0].start_pos) && (vector_end+seq.insert_start<=linkers[0].end_pos))
26
+ end
27
+
28
+ #Begins the plugin1's execution to warn that there are vectors in the sequence "seq"
29
+ def execute(seqs)
30
+ blasts= do_blasts(seqs)
31
+
32
+ seqs.each_with_index do |s,i|
33
+ exec_seq(s,blasts.querys[i])
34
+ end
35
+ end
36
+
37
+ def do_blasts(seqs)
38
+ # find MIDS with less results than max_target_seqs value
39
+ blast = BatchBlast.new("-db #{@params.get_param('vectors_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_vectors')} -perc_identity #{@params.get_param('blast_percent_vectors')} -culling_limit 1") #get vectors
40
+
41
+ $LOG.info('BLAST:'+blast.get_blast_cmd)
42
+
43
+ fastas=[]
44
+
45
+ seqs.each do |seq|
46
+ fastas.push ">"+seq.seq_name
47
+ fastas.push seq.seq_fasta
48
+ end
49
+
50
+ # fastas=fastas.join("\n")
51
+
52
+ blast_table_results = blast.do_blast(fastas,:xml)
53
+
54
+ # puts blast_table_results.inspect
55
+
56
+ return blast_table_results
57
+ end
58
+
59
+
60
+ def exec_seq(seq,blast_query)
61
+ if blast_query.query_id != seq.seq_name
62
+ # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
63
+ end
64
+
65
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
66
+
67
+ #blast contra contaminantes
68
+
69
+ # blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'vectors.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_vectors')} -perc_identity #{@params.get_param('blast_percent_vectors')} -culling_limit 1") #get vectors
70
+
71
+ # blast_table_results = blast.do_blast(seq.seq_fasta,:xml) #rise seq to contaminants executing over blast
72
+ type = "ActionVectors"
73
+
74
+ # puts res
75
+ # blast_table_results.inspect
76
+ # blast_table_results.querys.each do |query| # adds the correspondent action to the sequence
77
+ # query.hits.each do |hit|
78
+ # seq.add_action(hit.q_beg,hit.q_end,type)
79
+ # end
80
+ # end
81
+
82
+ vectors=[]
83
+ vectors_ids=[]
84
+ # blast_table_results.querys.each do |query| # first round to save vectors without overlap
85
+ # vectors_ids.push query.hits.subject_id if (not vectors_ids.include?(query.hits.subject_id))
86
+ merge_hits(blast_query.hits,vectors,vectors_ids)
87
+ # end
88
+
89
+
90
+
91
+ begin
92
+ vectors2=vectors # second round to save vectors without overlap
93
+ vectors = []
94
+ merge_hits(vectors2,vectors)
95
+ end until (vectors2.count == vectors.count)
96
+
97
+
98
+ actions = []
99
+ vectors_size=0
100
+ min_vector_size=@params.get_param('min_vector_seq_presence').to_i
101
+
102
+ vectors.each do |v| # adds the correspondent action to the sequence
103
+
104
+ #puts "*VECTOR* #{v.subject_id[0..40].ljust(40)} #{v.q_beg.to_s.rjust(6)} #{v.q_end.to_s.rjust(6)} #{v.s_beg.to_s.rjust(6)} #{v.s_end.to_s.rjust(6)}"
105
+
106
+ vector_size=v.q_end-v.q_beg+1
107
+ # puts " in PLUGIN VECTOR previous to add action #{seq.insert_start} #{seq.insert_end}"
108
+ # if ((vector_size>=MIN_VECTOR_SIZE) || ((vector_size<MIN_VECTOR_SIZE) && near_to_extrem(v,seq)))
109
+ if (near_to_extrem(v,seq,10) || (vector_size>=min_vector_size) )
110
+ # puts " near #{near_to_extrem(v,seq,min_vector_size)} #{vector_size}>=#{min_vector_size}"
111
+ #c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1) #if ab adapter is very near to the end of original sequence
112
+
113
+ piro_on=@params.get_param('next_generation_sequences')
114
+
115
+ if (((piro_on=='true') && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionLinker)) && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionMultipleLinker))) || # if vectors DB not is contained inside detected linkers
116
+ (piro_on=='false'))
117
+
118
+ # if vector is too big, and it isn't in an extreme, then it is an unexpected vector
119
+ if !near_to_extrem(v,seq,min_vector_size)
120
+ type = 'ActionUnexpectedVector'
121
+
122
+ seq.seq_rejected=true
123
+ seq.seq_rejected_by_message='unexpected vector'
124
+
125
+ add_stats('rejected','unexpected_vector')
126
+
127
+ end
128
+
129
+
130
+ a = seq.new_action(v.q_beg,v.q_end,type)
131
+ a.message = v.definition
132
+ # a.found_definition.push v.subject_id # save the vectors definitions, each separately
133
+ a.found_definition=vectors_ids # save the vectors definitions, each separately
134
+ a.reversed = v.reversed
135
+ a.cut=false if (piro_on=='true') # vectors don't cut when piro is on
136
+
137
+ # puts "piro on #{piro_on} vector cut #{a.cut} ________________|||||||||| "
138
+ # puts " no piro" if (piro_on=='false')
139
+
140
+ actions.push a
141
+
142
+ # @stats[:vector_size]={vector_size => 1}
143
+ add_stats('vector_size',vector_size)
144
+ vectors_ids.each do |v|
145
+ add_stats('vectors_ids',v)
146
+ end
147
+ end
148
+ end
149
+
150
+ end
151
+
152
+ seq.add_actions(actions)
153
+ #
154
+
155
+ end
156
+
157
+ #Returns an array with the errors due to parameters are missing
158
+ def self.check_params(params)
159
+ errors=[]
160
+
161
+
162
+ comment='Blast E-value used as cut-off when searching for vector fragments'
163
+ default_value = 1e-1
164
+ params.check_param(errors,'blast_evalue_vectors','Float',default_value,comment)
165
+
166
+ comment='Minimum required identity (%) for a reliable vector fragment'
167
+ default_value = 90
168
+ params.check_param(errors,'blast_percent_vectors','Integer',default_value,comment)
169
+
170
+ comment='Correct sequences could contain vectors only close to the read end (not within the sequence). The following variable indicates the number of nucleotides from the 5\' or 3\' end that are allowed for considering a vector fragment located at the end. Otherwise, the vector fragment will be qualified as internal and the sequence will be rejected'
171
+ default_value = 8
172
+ params.check_param(errors,'max_vector_to_end','Integer',default_value,comment)
173
+
174
+ comment='If a vector fragment is qualified as internal, the fragment should be long enough to be sure that it is a true vector fragment. This is the minimum length of a vector fragment that enables sequence rejection by an internal, unexpected vector'
175
+ default_value = 50
176
+ params.check_param(errors,'min_vector_seq_presence','Integer',default_value,comment)
177
+
178
+
179
+ comment='Vectors database path'
180
+ default_value = File.join($FORMATTED_DB_PATH,'vectors.fasta')
181
+ params.check_param(errors,'vectors_db','DB',default_value,comment)
182
+
183
+ # params.split_databases('vectors_db')
184
+
185
+ return errors
186
+ end
187
+
188
+
189
+
190
+
191
+ end
@@ -0,0 +1,16 @@
1
+ # ======================================
2
+ # General parameters to extract Amplicons
3
+ # ======================================
4
+
5
+ plugin_list = PluginLowHighSize,PluginKey,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginLowQuality,PluginAmplicons
6
+
7
+ # do not remove cloned sequences
8
+ remove_clonality=false
9
+
10
+ # sequences containing with diferent keys (barcodes) are saved to separate folders
11
+ use_independent_folder_for_each_key=true
12
+
13
+ # remove amplicons containing less or equal number of sequences indicated
14
+
15
+ minimal_repetitions_for_amplicons=1
16
+
@@ -0,0 +1,5 @@
1
+ # ======================================
2
+ # General parameters GENOMICS WITH POSSIBLE LINKER
3
+ # ======================================
4
+
5
+ plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginVectors,PluginLowQuality