seqtrimnext 2.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,267 @@
1
+ ########################################################
2
+ # Author: Almudena Bocinos Rioboo
3
+ #
4
+ # Defines the main methods that are necessary to execute a plugin
5
+ #
6
+ ########################################################
7
+
8
+ require 'string_utils'
9
+ # $: << '/Users/dariogf/progs/ruby/gems/scbi_blast/lib'
10
+
11
+ require 'scbi_blast'
12
+
13
+ class Plugin
14
+
15
+ attr_accessor :stats
16
+
17
+ #Loads the plugin's execution whit the sequence "seq"
18
+ def initialize(seq,params)
19
+ # $LOG.debug self.class.to_s + " processing sequence: " + seq.seq_name
20
+ # if (!(self.class.to_s=='PluginLowQuality') )
21
+ @params = params
22
+ @stats ={}
23
+ execute(seq)
24
+ # puts self.class.to_s + ' PPPPPPPPPP'
25
+ # else
26
+ # $LOG.error " Quality File haven't been provided. It's impossible to execute " +self.class.to_s + seq.seq_qual.nil?.to_s
27
+ # end
28
+ end
29
+
30
+ #Begins the plugin's execution whit the sequence "seq"
31
+ def execute(seqs)
32
+
33
+ end
34
+
35
+
36
+ #Initializes the structure stats to the given key and value , only when it is neccesary, and increases its counter
37
+ def add_stats(key,value)
38
+
39
+ @stats[key]={} if @stats[key].nil?
40
+
41
+ if @stats[key][value].nil?
42
+ @stats[key][value] = 0
43
+ end
44
+ @stats[key][value] += 1
45
+
46
+ # puts "@stats #{key} #{value}=#{ @stats[key][value]}"
47
+ end
48
+
49
+ #Initializes the structure stats to the given key and value , only when it is neccesary, and increases its counter
50
+ def add_text_stats(key,value,text)
51
+
52
+ @stats[key]={} if @stats[key].nil?
53
+
54
+ if @stats[key][value].nil?
55
+ @stats[key][value] = []
56
+ end
57
+
58
+ @stats[key][value].push(text)
59
+
60
+ end
61
+
62
+ def overlapX?(r1_start,r1_end,r2_start,r2_end)
63
+ # puts r1_start.class
64
+ # puts r1_end.class
65
+ # puts r2_start.class
66
+ # puts r2_end.class
67
+ # puts "-------"
68
+ #puts "overlap? (#{r1_start}<=#{r2_end}) and (#{r1_end}>=#{r2_start})"
69
+ return ((r1_start<=r2_end) and (r1_end>=r2_start) )
70
+ end
71
+
72
+ def merge_hits(hits,merged_hits,merged_ids=nil)
73
+ # puts " merging ============"
74
+ hits.each do |hit|
75
+
76
+ merged_ids.push hit.definition if !merged_ids.nil? && (! merged_ids.include?(hit.definition))
77
+ # if new hit's position is already contained in hits, then ignore the new hit
78
+ c=merged_hits.find{|c| overlapX?(hit.q_beg, hit.q_end,c.q_beg,c.q_end)}
79
+ # puts " c #{c.inspect}"
80
+
81
+
82
+ if (c.nil?)
83
+ # add new contaminant
84
+ #puts "NEW HIT #{hit.inspect}"
85
+ merged_hits.push(hit.dup)
86
+ #contaminants.push({:q_begin=>hit.q_beg,:q_end=>hit.q_end,:name=>hit.subject_id})
87
+ #
88
+ else
89
+ # merge with old contaminant
90
+ min=[c.q_beg,hit.q_beg].min
91
+ max=[c.q_end,hit.q_end].max
92
+
93
+ c.q_beg=min
94
+ c.q_end=max
95
+
96
+
97
+ # DONE para describir cada Id contaminante encontrado
98
+ # puts "1 -#{c.subject_id}- -#{hit.subject_id}-"
99
+ c.subject_id += ' ' + hit.subject_id if (not c.subject_id.include?(hit.subject_id))
100
+ # puts "2 -#{c.subject_id}- -#{hit.subject_id}-"
101
+ # puts "MERGE HIT (#{c.inspect})"
102
+
103
+ #
104
+ end
105
+
106
+ end
107
+ end
108
+
109
+
110
+ # def check_length_inserted(p_start,p_end,seq_fasta_length)
111
+ # min_insert_size = @params.get_param('min_insert_size ').to_i
112
+ # v1= p_end.to_i
113
+ # v2= p_start.to_i
114
+ # v3= v1 - v2
115
+ # $LOG.debug "------ #{v3} ----"
116
+ #
117
+ # res = true
118
+ # if ((v1 - v2 + 1) > (seq_fasta_length - min_insert_size ))
119
+ # $LOG.debug "ERROR------ SEQUENCE IS NOT GOOD ----"
120
+ # res = false
121
+ # end
122
+ # return res
123
+ # end
124
+ #------------------------------------------
125
+ # search a key into the sequence
126
+ # Used: in class PluginLinker and PluginMid
127
+ #-------------------------------------------
128
+ # def search_key (seq,key_start,key_end)
129
+ # p_q_beg=0
130
+ # p_q_end=0
131
+ # if (seq.seq_fasta[key_start..key_end]==@params.get_param('key'))
132
+ # actions=[]
133
+ # #Add ActionKey and apply it to cut the sequence
134
+ #
135
+ # type = "ActionKey"
136
+ #
137
+ # p_q_beg,p_q_end=key_start,key_end
138
+ # a = seq.new_action(p_q_beg,p_q_end,type) # adds the actionKey/actionMid to the sequence
139
+ #
140
+ # actions.push a
141
+ #
142
+ # seq.add_actions(actions) #apply cut to the sequence with the actions
143
+ # end
144
+ # return [p_q_beg,p_q_end]
145
+ #
146
+ # end
147
+
148
+ def self.check_param(errors,params,param,param_class,default_value=nil, comment=nil)
149
+
150
+ if !params.exists?(param)
151
+ if !default_value.nil?
152
+ params.set_param(param,default_value,comment)
153
+ else
154
+ errors.push "The param #{param} is required and thre is no default value available"
155
+ end
156
+ else
157
+ s = params.get_param(param)
158
+ # check_class=Object.const_get(param_class)
159
+ begin
160
+ case param_class
161
+ when 'Integer'
162
+ r = Integer(s)
163
+ when 'Float'
164
+ r = Float(s)
165
+ when 'String'
166
+ r = String(s)
167
+ end
168
+
169
+ rescue
170
+ errors.push " The param #{param} is not a valid #{param_class}: ##{s}#"
171
+ end
172
+ end
173
+
174
+ end
175
+
176
+
177
+
178
+ #Returns an array with the errors due to parameters are missing
179
+ def self.check_params(params)
180
+ return []
181
+ end
182
+
183
+
184
+ def self.graph_ignored?(stats_name)
185
+ res = true
186
+
187
+ if !self.ignored_graphs.include?(stats_name) && (self.valid_graphs.empty? || self.valid_graphs.include?(stats_name))
188
+ res = false
189
+ end
190
+
191
+ return res
192
+ end
193
+
194
+
195
+ def self.plot_setup(stats_value,stats_name,x,y,init_stats,plot)
196
+ return false
197
+ end
198
+
199
+ # automatically setup data
200
+ def self.auto_setup(stats_value,stats_name,x,y)
201
+
202
+ # res =false
203
+ #
204
+ # if !self.ignored_graphs.include?(stats_name) && (self.valid_graphs.empty? || self.valid_graphs.include?(stats_name))
205
+ #
206
+ # res = true
207
+ contains_strings=false
208
+
209
+ stats_value.keys.each do |v|
210
+ begin
211
+ r=Integer(v)
212
+ rescue
213
+ contains_strings=true
214
+ break
215
+ end
216
+ end
217
+
218
+ # puts "#{stats_name} => #{contains_strings}"
219
+
220
+
221
+ if !contains_strings
222
+ stats_value.keys.each do |v|
223
+ x.push v.to_i
224
+ end
225
+
226
+ x.sort!
227
+
228
+ x.each do |v|
229
+ y.push stats_value[v.to_s].to_i
230
+ end
231
+
232
+ else # there are strings in X
233
+ x2=[]
234
+
235
+ stats_value.keys.each do |v|
236
+ x.push "\"#{v.gsub('\"','').gsub('\'','')}\""
237
+ x2.push v
238
+ end
239
+
240
+ # puts ".#{x}."
241
+ x2.each do |v|
242
+ # puts ".#{v}."
243
+ y.push stats_value[v.to_s]
244
+ end
245
+ end
246
+
247
+ # return res
248
+ end
249
+
250
+ def self.get_graph_title(plugin_name,stats_name)
251
+ return plugin_name + '/' +stats_name
252
+ end
253
+
254
+ def self.get_graph_filename(plugin_name,stats_name)
255
+ return (plugin_name+ '_' +stats_name)
256
+ end
257
+
258
+ def self.ignored_graphs
259
+ return []
260
+ end
261
+
262
+ def self.valid_graphs
263
+ return []
264
+ end
265
+
266
+
267
+ end
@@ -0,0 +1,189 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAbAdapters < Plugin
11
+
12
+ # adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
13
+ MIN_ADAPTER_SIZE = 5
14
+ MIN_FAR_ADAPTER_SIZE = 13
15
+ MIN_LEFT_ADAPTER_SIZE = 9
16
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
17
+ def execute(seqs)
18
+ blasts= do_blasts(seqs)
19
+
20
+ seqs.each_with_index do |s,i|
21
+ exec_seq(s,blasts.querys[i])
22
+ end
23
+ end
24
+
25
+ def do_blasts(seqs)
26
+ # find MIDS with less results than max_target_seqs value
27
+ blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
28
+ $LOG.info('BLAST:'+blast.get_blast_cmd)
29
+
30
+ fastas=[]
31
+
32
+ seqs.each do |seq|
33
+ fastas.push ">"+seq.seq_name
34
+ fastas.push seq.seq_fasta
35
+ end
36
+
37
+ # fastas=fastas.join("\n")
38
+
39
+ blast_table_results = blast.do_blast(fastas)
40
+
41
+ # puts blast_table_results.inspect
42
+
43
+ return blast_table_results
44
+ end
45
+
46
+
47
+ def exec_seq(seq,blast_query)
48
+ if blast_query.query_id != seq.seq_name
49
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
50
+ end
51
+
52
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
53
+
54
+
55
+ # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_ab')} -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
56
+
57
+
58
+
59
+ # blast with only one sequence, no with many sequences from a database
60
+ #---------------------------------------------------------------------
61
+
62
+ # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
63
+
64
+ #BlastTableResult.new(res)
65
+ # puts blast.get_blast_cmd
66
+ # puts blast_table_results.inspect
67
+
68
+ adapters=[]
69
+ # blast_table_results.querys.each do |query| # first round to save adapters without overlap
70
+ merge_hits(blast_query.hits,adapters)
71
+ # end
72
+
73
+ begin
74
+ adapters2=adapters # second round to save adapters without overlap
75
+ adapters = []
76
+ merge_hits(adapters2,adapters)
77
+ end until (adapters2.count == adapters.count)
78
+
79
+ max_to_end=@params.get_param('max_ab_to_end').to_i
80
+ # type = 'ActionAbAdapter'
81
+ actions=[]
82
+ adapter_size=0
83
+
84
+ #@stats['adapter_size']={}
85
+ adapters.each do |c| # adds the correspondent action to the sequence
86
+ # puts "is the adapter near to the end of sequence ? #{c.q_end+seq.insert_start+max_to_end} >= ? #{seq.seq_fasta_orig.size-1}"
87
+ adapter_size=c.q_end-c.q_beg+1
88
+ #if ((c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1)
89
+ right_action = true
90
+ #if ab adapter is very near to the end of original sequence
91
+ if c.q_end>=seq.seq_fasta.length-max_to_end
92
+ message = c.subject_id
93
+ type = 'ActionAbAdapter'
94
+ ignore=false
95
+ add_stats('adapter_type','normal')
96
+
97
+ elsif (c.q_beg <= 4) && (adapter_size>=MIN_LEFT_ADAPTER_SIZE) #left adapter
98
+ message = c.subject_id
99
+ type = 'ActionAbLeftAdapter'
100
+ ignore = false
101
+ right_action = false
102
+ add_stats('adapter_type','left')
103
+ elsif (adapter_size>=MIN_FAR_ADAPTER_SIZE)
104
+ message = c.subject_id
105
+ type = 'ActionAbFarAdapter'
106
+ ignore = false
107
+ add_stats('adapter_type','far')
108
+ else
109
+ ignore=true
110
+ end
111
+
112
+ if !ignore
113
+ a = seq.new_action(c.q_beg,c.q_end,type)
114
+ a.message = message
115
+ a.reversed = c.reversed
116
+ if right_action
117
+ a.right_action = true #mark as rigth action to get the left insert
118
+ else
119
+ a.left_action = true
120
+ end
121
+ actions.push a
122
+
123
+ # puts "adapter_size #{adapter_size}"
124
+
125
+ #@stats[:adapter_size]={adapter_size => 1}
126
+ add_stats('adapter_size',adapter_size)
127
+ add_stats('adapter_id',message)
128
+ end
129
+ end
130
+
131
+ if !actions.empty?
132
+ seq.add_actions(actions)
133
+ add_stats('sequences_with_adapter','count')
134
+ end
135
+
136
+
137
+ #
138
+ end
139
+
140
+ #Returns an array with the errors due to parameters are missing
141
+ def self.check_params(params)
142
+ errors=[]
143
+
144
+ comment='Blast E-value used as cut-off when searching for 454 AB adapters'
145
+ # default_value = 1e-6
146
+ default_value = 1
147
+ params.check_param(errors,'blast_evalue_ab','Float',default_value,comment)
148
+
149
+ comment='Minimum required identity (%) for a reliable 454 AB adapter'
150
+ default_value = 95
151
+ params.check_param(errors,'blast_percent_ab','Integer',default_value,comment)
152
+
153
+ comment='454 AB adapters can be found only at the read end (not within it). The following variable indicates the number of nucleotides that are allowed for considering the AB adapters to be located at the end'
154
+ default_value = 9
155
+ params.check_param(errors,'max_ab_to_end','Integer',default_value,comment)
156
+
157
+ comment='Path for 454 AB adapters database'
158
+ default_value = File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')
159
+ params.check_param(errors,'adapters_ab_db','DB',default_value,comment)
160
+
161
+ return errors
162
+ end
163
+
164
+ def self.get_graph_title(plugin_name,stats_name)
165
+ case stats_name
166
+ when 'adapter_type'
167
+ 'AB adapters by type'
168
+ when 'adapter_size'
169
+ 'AB adapters by size'
170
+ end
171
+ end
172
+
173
+ def self.get_graph_filename(plugin_name,stats_name)
174
+ return stats_name
175
+
176
+ # case stats_name
177
+ # when 'adapter_type'
178
+ # 'AB adapters by type'
179
+ # when 'adapter_size'
180
+ # 'AB adapters by size'
181
+ # end
182
+ end
183
+
184
+ def self.valid_graphs
185
+ return ['adapter_type']
186
+ end
187
+
188
+
189
+ end
@@ -0,0 +1,165 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAdapters < Plugin
11
+
12
+ def get_type_adapter(p_start,p_end,seq)
13
+ #if q_beg is nearer the left, add adapter action by the left,
14
+ #if q_end esta is nearer the right , add adapter action by the right
15
+ #NOTE: If the adapter is very near from left and rigth,
16
+ #then the sequence isn't valid, because almost sequence is adapter.
17
+
18
+
19
+ v1= p_end.to_i
20
+ v2= p_start.to_i
21
+
22
+ # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
+
24
+ # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
+ if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
+ type = "ActionLeftAdapter"
27
+
28
+ else
29
+ type = "ActionRightAdapter"
30
+
31
+ end
32
+ return type
33
+ end
34
+
35
+
36
+ def cut_by_right(adapter,seq)
37
+
38
+ left_size = adapter.q_beg-seq.insert_start+1
39
+ right_size = seq.insert_end-adapter.q_end+1
40
+ left_size=0 if (left_size<0)
41
+ right_size=0 if (right_size<0)
42
+
43
+ return (left_size>(right_size/2).to_i)
44
+
45
+ end
46
+
47
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
48
+ def execute(seqs)
49
+ blasts= do_blasts(seqs)
50
+
51
+ seqs.each_with_index do |s,i|
52
+ exec_seq(s,blasts.querys[i])
53
+ end
54
+ end
55
+
56
+ def do_blasts(seqs)
57
+ # find MIDS with less results than max_target_seqs value
58
+ blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
59
+ $LOG.info('BLAST:'+blast.get_blast_cmd)
60
+
61
+ fastas=[]
62
+
63
+ seqs.each do |seq|
64
+ fastas.push ">"+seq.seq_name
65
+ fastas.push seq.seq_fasta
66
+ end
67
+
68
+ # fastas=fastas.join("\n")
69
+
70
+ blast_table_results = blast.do_blast(fastas)
71
+
72
+ # puts blast_table_results.inspect
73
+
74
+ return blast_table_results
75
+ end
76
+
77
+
78
+ def exec_seq(seq,blast_query)
79
+ if blast_query.query_id != seq.seq_name
80
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
81
+ end
82
+
83
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
84
+
85
+
86
+ # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
87
+
88
+ # blast with only one sequence, no with many sequences from a database
89
+ #---------------------------------------------------------------------
90
+
91
+ # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
92
+
93
+ #blast_table_results = BlastTableResult.new(res)
94
+
95
+ # blast_table_results.inspect
96
+
97
+ adapters=[]
98
+ # blast_table_results.querys.each do |query| # first round to save adapters without overlap
99
+ merge_hits(blast_query,adapters)
100
+ # end
101
+
102
+ begin
103
+ adapters2=adapters # second round to save adapters without overlap
104
+ adapters = []
105
+ merge_hits(adapters2,adapters)
106
+ end until (adapters2.count == adapters.count)
107
+
108
+ actions=[]
109
+ adapter_size=0
110
+ @stats['adapter_size']={}
111
+ adapters.each do |ad| # adds the correspondent action to the sequence
112
+
113
+ type = get_type_adapter(ad.q_beg,ad.q_end,seq)
114
+ a = seq.new_action(ad.q_beg,ad.q_end,type)
115
+ # puts " state left_action #{a.left_action} right_action #{a.right_action}"
116
+
117
+
118
+ adapter_size=ad.q_end-ad.q_beg+1
119
+
120
+ if cut_by_right(ad,seq)
121
+
122
+ # puts "action right end1 #{seq.insert_end}"
123
+
124
+ a.right_action=true #mark rigth action to get the left insert
125
+ else
126
+
127
+ # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
128
+
129
+ a.left_action = true #mark left action to get the right insert
130
+
131
+ end
132
+
133
+ a.message = ad.subject_id
134
+ a.reversed = ad.reversed
135
+ actions.push a
136
+
137
+ # @stats[:adapter_size]={adapter_size => 1}
138
+ add_stats('adapter_size',adapter_size)
139
+
140
+ end
141
+ seq.add_actions(actions)
142
+ #
143
+ end
144
+
145
+ #Returns an array with the errors due to parameters are missing
146
+ def self.check_params(params)
147
+ errors=[]
148
+
149
+ comment='Blast E-value used as cut-off when searching for adapters or primers'
150
+ default_value = 1e-6
151
+ params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
152
+
153
+ comment='Minimum required identity (%) for a reliable adapter'
154
+ default_value = 95
155
+ params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
156
+
157
+ comment='Path for adapter database'
158
+ default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
159
+ params.check_param(errors,'adapters_db','DB',default_value,comment)
160
+
161
+ return errors
162
+ end
163
+
164
+
165
+ end