seqtrimnext 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,221 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAmplicons < Plugin
11
+
12
+ # adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
13
+ # MIN_PRIMER_SIZE = 5
14
+ # MIN_FAR_ADAPTER_SIZE = 13
15
+ # MIN_LEFT_ADAPTER_SIZE = 9
16
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
17
+ def execute(seqs)
18
+ blasts= do_blasts(seqs)
19
+
20
+ seqs.each_with_index do |s,i|
21
+ exec_seq(s,blasts.querys[i])
22
+ end
23
+ end
24
+
25
+ def do_blasts(seqs)
26
+ # find MIDS with less results than max_target_seqs value
27
+ blast=BatchBlast.new("-db #{@params.get_param('primers_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_primers')}")
28
+ $LOG.info('BLAST:'+blast.get_blast_cmd)
29
+
30
+ fastas=[]
31
+
32
+ seqs.each do |seq|
33
+ fastas.push ">"+seq.seq_name
34
+ fastas.push seq.seq_fasta
35
+ end
36
+
37
+ # fastas=fastas.join("\n")
38
+
39
+ blast_table_results = blast.do_blast(fastas)
40
+
41
+ # puts blast_table_results.inspect
42
+
43
+ return blast_table_results
44
+ end
45
+
46
+
47
+ def exec_seq(seq,blast_query)
48
+ if blast_query.query_id != seq.seq_name
49
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
50
+ end
51
+
52
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for primers into the sequence"
53
+
54
+ # puts blast_query.inspect
55
+
56
+ # merge hits
57
+ # primers=blast_query.merged_hits!
58
+
59
+ # do not merge hits, since id is important
60
+ primers=blast_query.hits
61
+
62
+ min_primer_size=@params.get_param('min_primer_size').to_i
63
+ # puts "MERGED:"
64
+ # puts primers.inspect
65
+
66
+ # type = 'ActionAbAdapter'
67
+ actions=[]
68
+ adapter_size=0
69
+
70
+ # filter primers by size
71
+ primers = primers.select{|primer| (primer.size >= min_primer_size)}.sort{|p1,p2| p1.size<=>p2.size}.reverse
72
+ # puts "FILTERED:"
73
+ # puts primers.inspect
74
+
75
+ # reject sequences with less than two primers
76
+ if primers.count < 2
77
+
78
+ seq.seq_rejected=true
79
+ seq.seq_rejected_by_message='Primer pair not found'
80
+
81
+ # @stats[:rejected_seqs]={'rejected_seqs_by_contaminants' => 1}
82
+ add_stats('rejected','primers_not_found')
83
+
84
+ else # has two primers, or more
85
+
86
+ if seq.seq_fasta.index('N')
87
+ seq.seq_rejected=true
88
+ seq.seq_rejected_by_message='At least one N found'
89
+
90
+ # @stats[:rejected_seqs]={'rejected_seqs_by_contaminants' => 1}
91
+ add_stats('rejected','one_n_found')
92
+
93
+ else
94
+ # puts "EL DE ARRIBA"
95
+
96
+ # take first two primers and order them by qbeg
97
+ left_primer = primers[0..1].sort{|p1,p2| p1.q_beg<=>p2.q_beg}.first
98
+ right_primer = primers[0..1].sort{|p1,p2| p1.q_beg<=>p2.q_beg}.last
99
+
100
+ # puts "LEFT_PRIMER:"
101
+ # puts left_primer.inspect
102
+ # puts "RIGHT_PRIMER:"
103
+ # puts right_primer.inspect
104
+
105
+ # if (left_primer.size>= min_primer_size) && (right_primer.size>= min_primer_size)
106
+
107
+ a = seq.new_action(left_primer.q_beg,left_primer.q_end,'ActionLeftPrimer')
108
+ a.message = left_primer.subject_id
109
+ a.tag_id = left_primer.subject_id
110
+ a.reversed = left_primer.reversed
111
+ a.left_action = true
112
+ actions.push a
113
+
114
+ add_stats('primer_size',left_primer.size)
115
+ add_stats('primer_id',left_primer.subject_id)
116
+
117
+ a = seq.new_action(right_primer.q_beg,right_primer.q_end,'ActionRightPrimer')
118
+ a.message = right_primer.subject_id
119
+ a.reversed = right_primer.reversed
120
+ a.tag_id = right_primer.subject_id
121
+ a.right_action = true
122
+ actions.push a
123
+
124
+ add_stats('primer_size',right_primer.size)
125
+ add_stats('primer_id',right_primer.subject_id)
126
+
127
+ seq.add_file_tag(2, left_primer.subject_id, :file)
128
+ seq.add_file_tag(2, right_primer.subject_id, :file)
129
+
130
+
131
+ # end
132
+
133
+
134
+ if !actions.empty?
135
+ seq.add_actions(actions)
136
+ add_stats('sequences_with_primers','count')
137
+
138
+ # add_stats('sequences',seq.seq_fasta)
139
+ end
140
+
141
+ end
142
+ #
143
+ end
144
+ end
145
+
146
+ #Returns an array with the errors due to parameters are missing
147
+ def self.check_params(params)
148
+ errors=[]
149
+
150
+ comment='Blast E-value used as cut-off when searching for primers'
151
+ # default_value = 1e-6
152
+ default_value = 1
153
+ params.check_param(errors,'blast_evalue_primers','Float',default_value,comment)
154
+
155
+ comment='Minimum required identity (%) for a reliable primer'
156
+ default_value = 95
157
+ params.check_param(errors,'blast_percent_primers','Integer',default_value,comment)
158
+
159
+ comment='Minimun primer size'
160
+ default_value = 15
161
+ params.check_param(errors,'min_primer_size','Integer',default_value,comment)
162
+
163
+ comment='Path for primers database'
164
+ # default_value = File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')
165
+ default_value=nil
166
+ params.check_param(errors,'primers_db','DB',default_value,comment)
167
+
168
+ return errors
169
+ end
170
+
171
+ # def self.get_graph_title(plugin_name,stats_name)
172
+ # case stats_name
173
+ # when 'adapter_type'
174
+ # 'AB adapters by type'
175
+ # when 'adapter_size'
176
+ # 'AB adapters by size'
177
+ # end
178
+ # end
179
+ #
180
+ # def self.get_graph_filename(plugin_name,stats_name)
181
+ # return stats_name
182
+ #
183
+ # # case stats_name
184
+ # # when 'adapter_type'
185
+ # # 'AB adapters by type'
186
+ # # when 'adapter_size'
187
+ # # 'AB adapters by size'
188
+ # # end
189
+ # end
190
+ #
191
+ # def self.valid_graphs
192
+ # return ['adapter_type']
193
+ # end
194
+
195
+ # def self.plot_setup(stats_value,stats_name,x,y,init_stats,plot)
196
+ #
197
+ # # puts "============== #{stats_name}"
198
+ #
199
+ # # puts stats_name
200
+ # case stats_name
201
+ #
202
+ # when 'primer_size'
203
+ # plot.x_label= "Length"
204
+ # plot.y_label= "Count"
205
+ # # plot.x_range="[0:#{init_stats['biggest_sequence_size'].to_i}]"
206
+ # plot.x_range="[0:200]"
207
+ # puts x.class
208
+ # plot.add_x(x)
209
+ # plot.add_y(y)
210
+ #
211
+ # plot.do_graph
212
+ #
213
+ # return true
214
+ # else
215
+ # return false
216
+ # end
217
+ #
218
+ # end
219
+
220
+
221
+ end
@@ -0,0 +1,209 @@
1
+ require "plugin"
2
+
3
+ require "make_blast_db"
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute PluginContaminants
8
+ # Inherit: Plugin
9
+ ########################################################
10
+
11
+ class PluginContaminants < Plugin
12
+
13
+
14
+ MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
15
+
16
+
17
+ def near_to_extrem(c,seq,min_cont_size)
18
+ max_to_extreme=(min_cont_size/2).to_i
19
+ return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
20
+ end
21
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
22
+ def execute(seqs)
23
+ blasts= do_blasts(seqs)
24
+
25
+ seqs.each_with_index do |s,i|
26
+ exec_seq(s,blasts.querys[i])
27
+ end
28
+ end
29
+
30
+ def do_blasts(seqs)
31
+ # find MIDS with less results than max_target_seqs value
32
+ # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
33
+
34
+ # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
35
+ # y una secuencia de baja complejidad como entrada
36
+
37
+ blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
38
+
39
+ $LOG.info('BLAST:'+blast.get_blast_cmd(:xml))
40
+
41
+ fastas=[]
42
+
43
+ seqs.each do |seq|
44
+ fastas.push ">"+seq.seq_name
45
+ fastas.push seq.seq_fasta
46
+ end
47
+
48
+ # fastas=fastas.join("\n")
49
+ # $LOG.info('doing blast to:')
50
+ # $LOG.info('-'*20)
51
+ # $LOG.info(fastas)
52
+ # $LOG.info('-'*20)
53
+
54
+ blast_table_results = blast.do_blast(fastas,:xml)
55
+
56
+ # $LOG.info(blast_table_results.inspect)
57
+
58
+ return blast_table_results
59
+ end
60
+
61
+ # TODO - Contaminants databases grouped by folders
62
+ # TODO - User can select a set of contaminants folders
63
+
64
+
65
+ def exec_seq(seq,blast_query)
66
+ if blast_query.query_id != seq.seq_name
67
+ # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
68
+ end
69
+
70
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
71
+
72
+
73
+ #blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
74
+ # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
75
+
76
+
77
+ # blast_table_results = blast.do_blast(seq.seq_fasta,:xml) #rise seq to contaminants executing over blast
78
+
79
+
80
+ #blast_table_results = BlastTableResult.new(res)
81
+
82
+ type = "ActionIsContaminated"
83
+
84
+ contaminants=[]
85
+
86
+ contaminants_ids=[]
87
+
88
+ # blast_table_results.querys.each do |query| #first round to save contaminants without overlap
89
+ # contaminants_ids.push query.hits.definition if (not contaminants_ids.include?(query.hits.definition))
90
+ merge_hits(blast_query.hits,contaminants,contaminants_ids)
91
+ # end
92
+
93
+
94
+
95
+
96
+ begin
97
+ contaminants2=contaminants
98
+ contaminants = [] #second round to save contaminants without overlap
99
+ merge_hits(contaminants2,contaminants)
100
+ # DONE describir cada ID contaminante encontradomerge_hits(contaminants2,contaminants,ids_contaminants)
101
+ end until (contaminants2.count == contaminants.count)
102
+
103
+
104
+ actions=[]
105
+ contaminants_size=0
106
+
107
+ # @stats[:contaminants_size]={}
108
+ @stats['contaminants_size']={}
109
+ @stats['rejected_seqs']={}
110
+
111
+ min_cont_size=@params.get_param('min_contam_seq_presence').to_i
112
+
113
+ contaminants.each do |c|
114
+ contaminants_size=c.q_end - c.q_beg + 1
115
+ #if ( (@params.get_param('genus')!=c.subject_id.split('_')[1]) &&
116
+ valid_genus=@params.get_param('genus').empty? || !c.definition.upcase.index(@params.get_param('genus').upcase)
117
+
118
+ if (valid_genus) &&
119
+ (contaminants_size>=min_cont_size)
120
+
121
+ #( (min_cont_size<=contaminants_size) || (near_to_extrem(c,seq,min_cont_size)) ) )
122
+
123
+ if !seq.range_inside_action_type?(c.q_beg,c.q_end,ActionVectors)
124
+
125
+ # puts "DIFFERENT SPECIE #{specie} ,#{hit.subject_id.split('_')[1].to_s}"
126
+ a = seq.new_action(c.q_beg,c.q_end,type) # adds the correspondent action to the sequence
127
+ a.message = c.definition
128
+
129
+ a.found_definition = contaminants_ids # save the contaminants definitions, each separately
130
+ actions.push a
131
+
132
+ contaminants_size=c.q_end-c.q_beg+1
133
+
134
+ # if @stats[:contaminants_size][contaminants_size].nil?
135
+ # @stats[:contaminants_size][contaminants_size] = 0
136
+ # end
137
+ #
138
+ # @stats[:contaminants_size][contaminants_size] += 1
139
+ add_stats('contaminants_size',contaminants_size)
140
+ contaminants_ids.each do |c|
141
+ add_stats('contaminants_ids',c)
142
+ end
143
+
144
+ end
145
+ else
146
+ $LOG.info('Contaminant ignored due to genus match: '+c.definition)
147
+ end
148
+ end
149
+
150
+ reject=@params.get_param('contaminants_reject')
151
+ # cond_if=false
152
+ # cond_if=true if (not actions.empty? ) && (reject=='true')
153
+ #
154
+ # puts "Before check SEQ_REJECTED= TRUE (reject= .#{reject}. #{reject.class}&& not actions empty= #{not actions.empty?} ) == #{cond_if} >>> "
155
+
156
+
157
+
158
+ if ((not actions.empty? ) && (reject=='true'))
159
+ #reject sequence
160
+ # puts "SEQ_REJECTED= TRUE >>> "
161
+ seq.seq_rejected=true
162
+ seq.seq_rejected_by_message='contaminated'
163
+
164
+ # @stats[:rejected_seqs]={'rejected_seqs_by_contaminants' => 1}
165
+ add_stats('rejected','contaminated')
166
+
167
+ end
168
+
169
+ seq.add_actions(actions)
170
+
171
+
172
+ end
173
+
174
+ #Returns an array with the errors due to parameters are missing
175
+ def self.check_params(params)
176
+ errors=[]
177
+
178
+
179
+ comment='Blast E-value used as cut-off when searching for contaminations'
180
+ default_value = 1e-10
181
+ params.check_param(errors,'blast_evalue_contaminants','Float',default_value,comment)
182
+
183
+ comment='Minimum required identity (%) for a reliable contamination'
184
+ default_value = 85
185
+ params.check_param(errors,'blast_percent_contaminants','Integer',default_value,comment)
186
+
187
+ comment='Minimum hit size (nt) for considering a true contamination'
188
+ default_value = 40
189
+ params.check_param(errors,'min_contam_seq_presence','Integer',default_value,comment)
190
+
191
+ comment='Genus of input data: contaminations belonging to this genus will be ignored'
192
+ default_value = ''
193
+ params.check_param(errors,'genus','String',default_value,comment)
194
+
195
+ comment='Is a contamination considered a source of sequence rejection? (setting to false will only trim contaminated sequences instead of rejecting the complete read)'
196
+ default_value = 'true'
197
+ params.check_param(errors,'contaminants_reject','String',default_value,comment)
198
+
199
+
200
+ comment='Path for contaminants database'
201
+ default_value = File.join($FORMATTED_DB_PATH,'contaminants.fasta')
202
+ params.check_param(errors,'contaminants_db','DB',default_value,comment)
203
+
204
+
205
+ return errors
206
+ end
207
+
208
+
209
+ end