seqtrimnext 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,70 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginKey
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginKey < Plugin
11
+
12
+
13
+ #Begins the pluginKey's execution to warn where is a key in the sequence "seq"
14
+ def execute(seqs)
15
+ @group_by_key=(@params.get_param('use_independent_folder_for_each_key')=='true')
16
+
17
+ seqs.each do |s|
18
+ exec_seq(s)
19
+ end
20
+ end
21
+
22
+
23
+ def exec_seq(seq)
24
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
25
+ # blast_table_results.inspect
26
+
27
+ actions=[]
28
+
29
+ key_size=0
30
+ # mid_size=0
31
+ key_beg,key_end=[0,3]
32
+ key_size=4
33
+ key=seq.seq_fasta[0..3].upcase
34
+
35
+ a = seq.new_action(key_beg,key_end,'ActionKey') # adds the actionKey to the sequence
36
+ actions.push a
37
+
38
+ #Add actions
39
+ seq.add_actions(actions)
40
+
41
+
42
+ if @group_by_key
43
+
44
+ seq.add_file_tag(0,'key_' + key, :dir)
45
+ add_stats('key_tag',key)
46
+ end
47
+
48
+ add_stats('key_size',key_size)
49
+ # add_stats('mid_size',mid_size)
50
+
51
+
52
+
53
+ end
54
+
55
+ #Returns an array with the errors due to parameters are missing
56
+ def self.check_params(params)
57
+ errors=[]
58
+
59
+ # self.check_param(errors,params,'blast_evalue_mids','Float')
60
+ # self.check_param(errors,params,'blast_percent_mids','Integer')
61
+ comment='sequences containing with diferent keys (barcodes) are saved to separate folders'
62
+ default_value='false'
63
+ params.check_param(errors,'use_independent_folder_for_each_key','String',default_value,comment)
64
+
65
+
66
+ return errors
67
+ end
68
+
69
+
70
+ end
@@ -0,0 +1,232 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginLinker
7
+ # Inherit: Plugin
8
+ ########################################################
9
+ class PluginLinker < Plugin
10
+ MAX_LINKER_ERRORS=2
11
+ #-------------------------------------------------------------------------
12
+ #It's created an ActionInsert or ActionShortInsert before the ActionLinker
13
+ #Used: in class PluginLinker and PluginMid
14
+ #-------------------------------------------------------------------------
15
+ # def add_action_before_linker(p_q_beg,size_insert,actions,seq)
16
+ #
17
+ # size_min_insert = @params.get_param('size_min_insert').to_i
18
+ # if ((p_q_beg>0) && (size_insert>=size_min_insert)) #if linker's positions are right
19
+ # #It's created an ActionInsert or ActionShortInsert before the ActionLinker
20
+ # a = seq.new_action(0,p_q_beg-1,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
21
+ # actions.push a
22
+ # elsif (p_q_beg>0) #if linker's positions are right and insert's size is short
23
+ # #It's created an ActionShortInsert before the ActionLinker
24
+ # a = seq.new_action(0,p_q_beg-1,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
25
+ # actions.push a
26
+ # end
27
+ #
28
+ # end
29
+
30
+ #-------------------------------------------------------------------------
31
+ #It's created an ActionInsert or ActionShortInsert after the ActionLinker
32
+ #-------------------------------------------------------------------------
33
+ # def add_action_after_linker(p_q_end,size_insert,actions,seq)
34
+ #
35
+ # size_min_insert = @params.get_param('size_min_insert').to_i
36
+ #
37
+ # if ((p_q_end<seq.seq_fasta.size-1) && (size_insert>=size_min_insert) ) #if linker's positions are right
38
+ # #It's created an ActionInsert after the ActionLinker
39
+ # a = seq.new_action(p_q_end+1,seq.seq_fasta.size-1,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
40
+ #
41
+ # actions.push a
42
+ #
43
+ # elsif (p_q_end<seq.seq_fasta.size-1) #if linker's positions are right and insert's size is short
44
+ # #It's created an ActionInsert after the ActionLinker
45
+ # a = seq.new_action(p_q_end+1,seq.seq_fasta.size-1,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
46
+ #
47
+ # actions.push a
48
+ # end
49
+ #
50
+ # end
51
+ #
52
+
53
+ def sum_quals(a)
54
+ res = 0
55
+
56
+ a.map{|e| res+=e}
57
+
58
+ return res
59
+ end
60
+
61
+ def merge_hits_with_same_qbeg_and_qend(hits)
62
+ res =[]
63
+
64
+ hits.each do |hit|
65
+ if !res.find{|h| (h.q_beg==hit.q_beg) && (h.q_end==hit.q_end)}
66
+ res.push hit
67
+ end
68
+ end
69
+
70
+ return res
71
+ end
72
+
73
+ # Begins the plugin1's execution to warn that there is linker into the sequence
74
+ def execute(seqs)
75
+ blasts= do_blasts(seqs)
76
+
77
+ seqs.each_with_index do |s,i|
78
+ exec_seq(s,blasts.querys[i])
79
+ end
80
+ end
81
+
82
+ def do_blasts(seqs)
83
+ # find MIDS with less results than max_target_seqs value
84
+ blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
85
+
86
+ $LOG.info('BLAST:'+blast.get_blast_cmd)
87
+
88
+ fastas=[]
89
+
90
+ seqs.each do |seq|
91
+ fastas.push ">"+seq.seq_name
92
+ fastas.push seq.seq_fasta
93
+ end
94
+
95
+ # fastas=fastas.join("\n")
96
+
97
+ blast_table_results = blast.do_blast(fastas)
98
+
99
+ # puts blast_table_results.inspect
100
+
101
+ return blast_table_results
102
+ end
103
+
104
+
105
+ def exec_seq(seq,blast_query)
106
+ if blast_query.query_id != seq.seq_name
107
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
108
+ end
109
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
110
+
111
+ # key_beg,key_end=search_key(seq,0,3) if false
112
+ # blast = BatchBlast.new("-subject #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
113
+ # blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
114
+ #
115
+ # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to linkers executing over blast
116
+
117
+
118
+ #blast_table_results = BlastTableResult.new(res)
119
+ actions=[]
120
+ linker_size=0
121
+
122
+ if (!blast_query.hits.empty?) #linker found
123
+
124
+ linkers = merge_hits_with_same_qbeg_and_qend(blast_query.hits)
125
+
126
+ if linkers.count ==1
127
+
128
+ linker=linkers.first
129
+
130
+ linker_size=linker.q_end-linker.q_beg+1
131
+
132
+ if (linker.gaps+linker.mismatches>MAX_LINKER_ERRORS) # number of ERRORS and GAPs is higher than MAX_LINKER_ERRORS,
133
+ seq.seq_rejected=true
134
+ seq.seq_rejected_by_message='linker with mismatches'
135
+ # @stats[:rejected_seqs]={'rejected_seqs_by_errors' => 1}
136
+ add_stats('rejected','by_linker_errors')
137
+ add_stats('linker_errors',linker.gaps+linker.mismatches)
138
+ else
139
+ #Create an ActionLinker
140
+ a = seq.new_action(linker.q_beg,linker.q_end,'ActionLinker') # adds the ActionLinker to the sequence
141
+ a.message = linker.subject_id
142
+ a.tag_id = linker.subject_id
143
+ actions.push a
144
+
145
+ # seq.add_file_tag(0, 'paired', :file)
146
+
147
+ add_stats('linker_id',linker.subject_id)
148
+ add_stats('linker_id','total')
149
+
150
+ end
151
+
152
+ else # multiple linkers found
153
+ q_begs=[]
154
+ q_ends=[]
155
+
156
+ linker_count=linkers.count
157
+
158
+ linkers.each do |linker|
159
+ #puts "*MULTILINKER* #{linker.subject_id[0..40].ljust(40)} #{linker.q_beg.to_s.rjust(6)} #{linker.q_end.to_s.rjust(6)} #{linker.s_beg.to_s.rjust(6)} #{linker.s_end.to_s.rjust(6)}"
160
+ q_begs.push linker.q_beg
161
+ q_ends.push linker.q_end
162
+
163
+ end
164
+
165
+ first_linker = linkers.first
166
+ last_linker = linkers.last
167
+
168
+ a = seq.new_action(q_begs.min,q_ends.max,'ActionMultipleLinker') # adds the ActionLinker to the sequence
169
+ a.message = "#{linker_count} x #{first_linker.subject_id}"
170
+ a.tag_id = first_linker.subject_id
171
+
172
+ #determine with part (left or right) has the best quality
173
+ left_quals = seq.seq_qual[0,q_begs.min]
174
+ sum_left=sum_quals(left_quals)
175
+
176
+ right_quals = seq.seq_qual[q_ends.max+1..seq.seq_qual.length]
177
+ sum_right=sum_quals(right_quals)
178
+
179
+ if sum_left>=sum_right
180
+ a.right_action=true
181
+ else
182
+ a.left_action=true
183
+ end
184
+
185
+ #puts "SUM QUAL LEFT:#{sum_left} count:#{left_quals.length}"
186
+ #puts "SUM QUAL RIGHT:#{sum_right} count:#{right_quals.length}"
187
+
188
+
189
+ actions.push a
190
+
191
+ add_stats('multiple_linker_id',first_linker.subject_id)
192
+ add_stats('multiple_linker_id','total')
193
+ add_stats('multiple_linker_count',linker_count)
194
+
195
+ # puts "=== > seq_qual: #{seq.seq_qual.count}"
196
+ # seq.get_qual_inserts.each do |qi|
197
+ # puts "==> #{qi.join(' ')}"
198
+ # end
199
+
200
+ end
201
+
202
+ else # no linker found
203
+ add_stats('without_linker',linker_size)
204
+ end
205
+
206
+
207
+ if !actions.empty?
208
+ #Add actions
209
+ seq.add_actions(actions)
210
+ end
211
+
212
+ end
213
+
214
+
215
+ #Returns an array with the errors due to parameters are missing
216
+ def self.check_params(params)
217
+ errors=[]
218
+
219
+ comment='Blast E-value used as cut-off when searching for linkers in paired-ends'
220
+ default_value = 1e-10
221
+ params.check_param(errors,'blast_evalue_linkers','Float',default_value,comment)
222
+
223
+ comment='Minimum required identity (%) for a reliable linker'
224
+ default_value = 95
225
+ params.check_param(errors,'blast_percent_linkers','Integer',default_value,comment)
226
+
227
+
228
+ return errors
229
+ end
230
+
231
+
232
+ end
@@ -0,0 +1,98 @@
1
+ ########################################################
2
+ # Author: Almudena Bocinos Rioboo
3
+ #
4
+ # Defines the main methods that are necessary to execute PluginLowComplexity
5
+
6
+ #
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ require "plugin"
11
+
12
+ MIN_DUST_SIZE = 30
13
+
14
+ class PluginLowComplexity < Plugin
15
+
16
+
17
+
18
+ def execute(seqs)
19
+ dust= do_dust(seqs)
20
+
21
+ seqs.each_with_index do |s,i|
22
+ exec_seq(s,dust[i])
23
+ end
24
+ end
25
+
26
+ def do_dust(seqs)
27
+ # find MIDS with less results than max_target_seqs value
28
+ dust_masker=DustMasker.new()
29
+
30
+ fastas=[]
31
+
32
+ seqs.each do |seq|
33
+ fastas.push ">"+seq.seq_name
34
+ fastas.push seq.seq_fasta
35
+ end
36
+
37
+ # fastas=fastas.join("\n")
38
+
39
+ found_dust = dust_masker.do_dust(fastas)
40
+ # puts found_dust
41
+ # puts blast_table_results.inspect
42
+
43
+ return found_dust
44
+ end
45
+
46
+
47
+ def exec_seq(seq,dust_query)
48
+ if dust_query.query_id != seq.seq_name
49
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
50
+ end
51
+ actions=[]
52
+
53
+ # puts "Checking for dust: #{seq.seq_fasta}"
54
+ # puts found_dust.to_json
55
+ total_dust=0
56
+ if !dust_query.nil?
57
+ dust_query.dust.each do |dust|
58
+ start=dust[0]
59
+ stop=dust[1]
60
+ dust_size=dust[1]-dust[0]+1
61
+ total_dust+=dust_size
62
+
63
+ if (dust_size)>=MIN_DUST_SIZE
64
+
65
+ a = seq.new_action(start,stop,'ActionLowComplexity')
66
+ # a.left_action=true
67
+ actions.push a
68
+ # break
69
+ end
70
+ end
71
+ end
72
+
73
+ if !actions.empty?
74
+ add_stats('low_complexity',total_dust)
75
+ seq.add_actions(actions)
76
+ end
77
+
78
+ end
79
+
80
+
81
+
82
+ ######################################################################
83
+ #---------------------------------------------------------------------
84
+
85
+ #Returns an array with the errors due to parameters are missing
86
+ def self.check_params(params)
87
+ errors=[]
88
+
89
+ #
90
+ # comment='Minimum percent of T bases in poly_a to be accepted'
91
+ # default_value = 80
92
+ # params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
93
+ #
94
+ return errors
95
+ end
96
+
97
+
98
+ end
@@ -0,0 +1,74 @@
1
+ ########################################################
2
+ # Author: Almudena Bocinos Rioboo
3
+ #
4
+ # Defines the main methods that are necessary to execute PluginLowHighSize
5
+
6
+ #
7
+ # Inherit: Plugin
8
+ ########################################################
9
+ require "plugin"
10
+
11
+
12
+ class PluginLowHighSize < Plugin
13
+
14
+
15
+ # Begins the plugin_low_high_size's execution with the sequence "seq"
16
+
17
+ def execute(seqs)
18
+ seqs.each do |s|
19
+ exec_seq(s)
20
+ end
21
+ end
22
+
23
+
24
+ def exec_seq(seq)
25
+ $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
26
+
27
+ min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
28
+ max_size = @params.get_param('max_sequence_size_raw').to_i #max_size is: mean + 2dev
29
+ #add_stats('rejected_seqs',seq.seq_fasta.length)
30
+ actions=[]
31
+
32
+ if ((seq.seq_fasta.length>max_size) || (seq.seq_fasta.length<min_size)) #if length of sequence is out of (-2dev,2dev)
33
+ $LOG.debug "#{seq.seq_name} rejected by size #{seq.seq_fasta.length} "
34
+ type='ActionLowHighSize'
35
+ # seq.add_action(0,seq.seq_fasta.length,type)
36
+ a = seq.new_action(0,seq.seq_fasta.length,type)
37
+ a.message = 'low or high size'
38
+ seq.seq_rejected = true
39
+ seq.seq_rejected_by_message= 'size out of limits'
40
+
41
+ add_stats('rejected_seqs',seq.seq_fasta.length)
42
+ actions.push a
43
+ seq.add_actions(actions)
44
+
45
+ end
46
+
47
+
48
+ end
49
+
50
+ ######################################################################
51
+ #---------------------------------------------------------------------
52
+
53
+
54
+
55
+ ######################################################################
56
+ #---------------------------------------------------------------------
57
+
58
+ #Returns an array with the errors due to parameters are missing
59
+ def self.check_params(params)
60
+ errors=[]
61
+
62
+ comment='Minimum size for a raw input sequence to be analysed (shorter reads are directly rejected without further analysis)'
63
+ default_value = 40
64
+ params.check_param(errors,'min_sequence_size_raw','Integer',default_value,comment)
65
+
66
+ #self.check_param(errors,params,'max_sequence_size_raw','Integer')
67
+
68
+
69
+ return errors
70
+ end
71
+
72
+
73
+
74
+ end