seqtrimnext 2.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,221 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAmplicons < Plugin
11
+
12
+ # adapters found at end of sequence are even 2 nt wide, cut in 5 because of statistics
13
+ # MIN_PRIMER_SIZE = 5
14
+ # MIN_FAR_ADAPTER_SIZE = 13
15
+ # MIN_LEFT_ADAPTER_SIZE = 9
16
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
17
+ def execute(seqs)
18
+ blasts= do_blasts(seqs)
19
+
20
+ seqs.each_with_index do |s,i|
21
+ exec_seq(s,blasts.querys[i])
22
+ end
23
+ end
24
+
25
+ def do_blasts(seqs)
26
+ # find MIDS with less results than max_target_seqs value
27
+ blast=BatchBlast.new("-db #{@params.get_param('primers_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_primers')}")
28
+ $LOG.info('BLAST:'+blast.get_blast_cmd)
29
+
30
+ fastas=[]
31
+
32
+ seqs.each do |seq|
33
+ fastas.push ">"+seq.seq_name
34
+ fastas.push seq.seq_fasta
35
+ end
36
+
37
+ # fastas=fastas.join("\n")
38
+
39
+ blast_table_results = blast.do_blast(fastas)
40
+
41
+ # puts blast_table_results.inspect
42
+
43
+ return blast_table_results
44
+ end
45
+
46
+
47
+ def exec_seq(seq,blast_query)
48
+ if blast_query.query_id != seq.seq_name
49
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
50
+ end
51
+
52
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for primers into the sequence"
53
+
54
+ # puts blast_query.inspect
55
+
56
+ # merge hits
57
+ # primers=blast_query.merged_hits!
58
+
59
+ # do not merge hits, since id is important
60
+ primers=blast_query.hits
61
+
62
+ min_primer_size=@params.get_param('min_primer_size').to_i
63
+ # puts "MERGED:"
64
+ # puts primers.inspect
65
+
66
+ # type = 'ActionAbAdapter'
67
+ actions=[]
68
+ adapter_size=0
69
+
70
+ # filter primers by size
71
+ primers = primers.select{|primer| (primer.size >= min_primer_size)}.sort{|p1,p2| p1.size<=>p2.size}.reverse
72
+ # puts "FILTERED:"
73
+ # puts primers.inspect
74
+
75
+ # reject sequences with less than two primers
76
+ if primers.count < 2
77
+
78
+ seq.seq_rejected=true
79
+ seq.seq_rejected_by_message='Primer pair not found'
80
+
81
+ # @stats[:rejected_seqs]={'rejected_seqs_by_contaminants' => 1}
82
+ add_stats('rejected','primers_not_found')
83
+
84
+ else # has two primers, or more
85
+
86
+ if seq.seq_fasta.index('N')
87
+ seq.seq_rejected=true
88
+ seq.seq_rejected_by_message='At least one N found'
89
+
90
+ # @stats[:rejected_seqs]={'rejected_seqs_by_contaminants' => 1}
91
+ add_stats('rejected','one_n_found')
92
+
93
+ else
94
+ # puts "EL DE ARRIBA"
95
+
96
+ # take first two primers and order them by qbeg
97
+ left_primer = primers[0..1].sort{|p1,p2| p1.q_beg<=>p2.q_beg}.first
98
+ right_primer = primers[0..1].sort{|p1,p2| p1.q_beg<=>p2.q_beg}.last
99
+
100
+ # puts "LEFT_PRIMER:"
101
+ # puts left_primer.inspect
102
+ # puts "RIGHT_PRIMER:"
103
+ # puts right_primer.inspect
104
+
105
+ # if (left_primer.size>= min_primer_size) && (right_primer.size>= min_primer_size)
106
+
107
+ a = seq.new_action(left_primer.q_beg,left_primer.q_end,'ActionLeftPrimer')
108
+ a.message = left_primer.subject_id
109
+ a.tag_id = left_primer.subject_id
110
+ a.reversed = left_primer.reversed
111
+ a.left_action = true
112
+ actions.push a
113
+
114
+ add_stats('primer_size',left_primer.size)
115
+ add_stats('primer_id',left_primer.subject_id)
116
+
117
+ a = seq.new_action(right_primer.q_beg,right_primer.q_end,'ActionRightPrimer')
118
+ a.message = right_primer.subject_id
119
+ a.reversed = right_primer.reversed
120
+ a.tag_id = right_primer.subject_id
121
+ a.right_action = true
122
+ actions.push a
123
+
124
+ add_stats('primer_size',right_primer.size)
125
+ add_stats('primer_id',right_primer.subject_id)
126
+
127
+ seq.add_file_tag(2, left_primer.subject_id, :file)
128
+ seq.add_file_tag(2, right_primer.subject_id, :file)
129
+
130
+
131
+ # end
132
+
133
+
134
+ if !actions.empty?
135
+ seq.add_actions(actions)
136
+ add_stats('sequences_with_primers','count')
137
+
138
+ # add_stats('sequences',seq.seq_fasta)
139
+ end
140
+
141
+ end
142
+ #
143
+ end
144
+ end
145
+
146
+ #Returns an array with the errors due to parameters are missing
147
+ def self.check_params(params)
148
+ errors=[]
149
+
150
+ comment='Blast E-value used as cut-off when searching for primers'
151
+ # default_value = 1e-6
152
+ default_value = 1
153
+ params.check_param(errors,'blast_evalue_primers','Float',default_value,comment)
154
+
155
+ comment='Minimum required identity (%) for a reliable primer'
156
+ default_value = 95
157
+ params.check_param(errors,'blast_percent_primers','Integer',default_value,comment)
158
+
159
+ comment='Minimun primer size'
160
+ default_value = 15
161
+ params.check_param(errors,'min_primer_size','Integer',default_value,comment)
162
+
163
+ comment='Path for primers database'
164
+ # default_value = File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')
165
+ default_value=nil
166
+ params.check_param(errors,'primers_db','DB',default_value,comment)
167
+
168
+ return errors
169
+ end
170
+
171
+ # def self.get_graph_title(plugin_name,stats_name)
172
+ # case stats_name
173
+ # when 'adapter_type'
174
+ # 'AB adapters by type'
175
+ # when 'adapter_size'
176
+ # 'AB adapters by size'
177
+ # end
178
+ # end
179
+ #
180
+ # def self.get_graph_filename(plugin_name,stats_name)
181
+ # return stats_name
182
+ #
183
+ # # case stats_name
184
+ # # when 'adapter_type'
185
+ # # 'AB adapters by type'
186
+ # # when 'adapter_size'
187
+ # # 'AB adapters by size'
188
+ # # end
189
+ # end
190
+ #
191
+ # def self.valid_graphs
192
+ # return ['adapter_type']
193
+ # end
194
+
195
+ # def self.plot_setup(stats_value,stats_name,x,y,init_stats,plot)
196
+ #
197
+ # # puts "============== #{stats_name}"
198
+ #
199
+ # # puts stats_name
200
+ # case stats_name
201
+ #
202
+ # when 'primer_size'
203
+ # plot.x_label= "Length"
204
+ # plot.y_label= "Count"
205
+ # # plot.x_range="[0:#{init_stats['biggest_sequence_size'].to_i}]"
206
+ # plot.x_range="[0:200]"
207
+ # puts x.class
208
+ # plot.add_x(x)
209
+ # plot.add_y(y)
210
+ #
211
+ # plot.do_graph
212
+ #
213
+ # return true
214
+ # else
215
+ # return false
216
+ # end
217
+ #
218
+ # end
219
+
220
+
221
+ end
@@ -0,0 +1,209 @@
1
+ require "plugin"
2
+
3
+ require "make_blast_db"
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute PluginContaminants
8
+ # Inherit: Plugin
9
+ ########################################################
10
+
11
+ class PluginContaminants < Plugin
12
+
13
+
14
+ MAX_TARGETS_SEQS=4 #MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE
15
+
16
+
17
+ def near_to_extrem(c,seq,min_cont_size)
18
+ max_to_extreme=(min_cont_size/2).to_i
19
+ return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
20
+ end
21
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
22
+ def execute(seqs)
23
+ blasts= do_blasts(seqs)
24
+
25
+ seqs.each_with_index do |s,i|
26
+ exec_seq(s,blasts.querys[i])
27
+ end
28
+ end
29
+
30
+ def do_blasts(seqs)
31
+ # find MIDS with less results than max_target_seqs value
32
+ # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
33
+
34
+ # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
35
+ # y una secuencia de baja complejidad como entrada
36
+
37
+ blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
38
+
39
+ $LOG.info('BLAST:'+blast.get_blast_cmd(:xml))
40
+
41
+ fastas=[]
42
+
43
+ seqs.each do |seq|
44
+ fastas.push ">"+seq.seq_name
45
+ fastas.push seq.seq_fasta
46
+ end
47
+
48
+ # fastas=fastas.join("\n")
49
+ # $LOG.info('doing blast to:')
50
+ # $LOG.info('-'*20)
51
+ # $LOG.info(fastas)
52
+ # $LOG.info('-'*20)
53
+
54
+ blast_table_results = blast.do_blast(fastas,:xml)
55
+
56
+ # $LOG.info(blast_table_results.inspect)
57
+
58
+ return blast_table_results
59
+ end
60
+
61
+ # TODO - Contaminants databases grouped by folders
62
+ # TODO - User can select a set of contaminants folders
63
+
64
+
65
+ def exec_seq(seq,blast_query)
66
+ if blast_query.query_id != seq.seq_name
67
+ # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
68
+ end
69
+
70
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
71
+
72
+
73
+ #blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
74
+ # blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
75
+
76
+
77
+ # blast_table_results = blast.do_blast(seq.seq_fasta,:xml) #rise seq to contaminants executing over blast
78
+
79
+
80
+ #blast_table_results = BlastTableResult.new(res)
81
+
82
+ type = "ActionIsContaminated"
83
+
84
+ contaminants=[]
85
+
86
+ contaminants_ids=[]
87
+
88
+ # blast_table_results.querys.each do |query| #first round to save contaminants without overlap
89
+ # contaminants_ids.push query.hits.definition if (not contaminants_ids.include?(query.hits.definition))
90
+ merge_hits(blast_query.hits,contaminants,contaminants_ids)
91
+ # end
92
+
93
+
94
+
95
+
96
+ begin
97
+ contaminants2=contaminants
98
+ contaminants = [] #second round to save contaminants without overlap
99
+ merge_hits(contaminants2,contaminants)
100
+ # DONE describir cada ID contaminante encontradomerge_hits(contaminants2,contaminants,ids_contaminants)
101
+ end until (contaminants2.count == contaminants.count)
102
+
103
+
104
+ actions=[]
105
+ contaminants_size=0
106
+
107
+ # @stats[:contaminants_size]={}
108
+ @stats['contaminants_size']={}
109
+ @stats['rejected_seqs']={}
110
+
111
+ min_cont_size=@params.get_param('min_contam_seq_presence').to_i
112
+
113
+ contaminants.each do |c|
114
+ contaminants_size=c.q_end - c.q_beg + 1
115
+ #if ( (@params.get_param('genus')!=c.subject_id.split('_')[1]) &&
116
+ valid_genus=@params.get_param('genus').empty? || !c.definition.upcase.index(@params.get_param('genus').upcase)
117
+
118
+ if (valid_genus) &&
119
+ (contaminants_size>=min_cont_size)
120
+
121
+ #( (min_cont_size<=contaminants_size) || (near_to_extrem(c,seq,min_cont_size)) ) )
122
+
123
+ if !seq.range_inside_action_type?(c.q_beg,c.q_end,ActionVectors)
124
+
125
+ # puts "DIFFERENT SPECIE #{specie} ,#{hit.subject_id.split('_')[1].to_s}"
126
+ a = seq.new_action(c.q_beg,c.q_end,type) # adds the correspondent action to the sequence
127
+ a.message = c.definition
128
+
129
+ a.found_definition = contaminants_ids # save the contaminants definitions, each separately
130
+ actions.push a
131
+
132
+ contaminants_size=c.q_end-c.q_beg+1
133
+
134
+ # if @stats[:contaminants_size][contaminants_size].nil?
135
+ # @stats[:contaminants_size][contaminants_size] = 0
136
+ # end
137
+ #
138
+ # @stats[:contaminants_size][contaminants_size] += 1
139
+ add_stats('contaminants_size',contaminants_size)
140
+ contaminants_ids.each do |c|
141
+ add_stats('contaminants_ids',c)
142
+ end
143
+
144
+ end
145
+ else
146
+ $LOG.info('Contaminant ignored due to genus match: '+c.definition)
147
+ end
148
+ end
149
+
150
+ reject=@params.get_param('contaminants_reject')
151
+ # cond_if=false
152
+ # cond_if=true if (not actions.empty? ) && (reject=='true')
153
+ #
154
+ # puts "Before check SEQ_REJECTED= TRUE (reject= .#{reject}. #{reject.class}&& not actions empty= #{not actions.empty?} ) == #{cond_if} >>> "
155
+
156
+
157
+
158
+ if ((not actions.empty? ) && (reject=='true'))
159
+ #reject sequence
160
+ # puts "SEQ_REJECTED= TRUE >>> "
161
+ seq.seq_rejected=true
162
+ seq.seq_rejected_by_message='contaminated'
163
+
164
+ # @stats[:rejected_seqs]={'rejected_seqs_by_contaminants' => 1}
165
+ add_stats('rejected','contaminated')
166
+
167
+ end
168
+
169
+ seq.add_actions(actions)
170
+
171
+
172
+ end
173
+
174
+ #Returns an array with the errors due to parameters are missing
175
+ def self.check_params(params)
176
+ errors=[]
177
+
178
+
179
+ comment='Blast E-value used as cut-off when searching for contaminations'
180
+ default_value = 1e-10
181
+ params.check_param(errors,'blast_evalue_contaminants','Float',default_value,comment)
182
+
183
+ comment='Minimum required identity (%) for a reliable contamination'
184
+ default_value = 85
185
+ params.check_param(errors,'blast_percent_contaminants','Integer',default_value,comment)
186
+
187
+ comment='Minimum hit size (nt) for considering a true contamination'
188
+ default_value = 40
189
+ params.check_param(errors,'min_contam_seq_presence','Integer',default_value,comment)
190
+
191
+ comment='Genus of input data: contaminations belonging to this genus will be ignored'
192
+ default_value = ''
193
+ params.check_param(errors,'genus','String',default_value,comment)
194
+
195
+ comment='Is a contamination considered a source of sequence rejection? (setting to false will only trim contaminated sequences instead of rejecting the complete read)'
196
+ default_value = 'true'
197
+ params.check_param(errors,'contaminants_reject','String',default_value,comment)
198
+
199
+
200
+ comment='Path for contaminants database'
201
+ default_value = File.join($FORMATTED_DB_PATH,'contaminants.fasta')
202
+ params.check_param(errors,'contaminants_db','DB',default_value,comment)
203
+
204
+
205
+ return errors
206
+ end
207
+
208
+
209
+ end