seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
require "plugin"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute PluginKey
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class PluginKey < Plugin
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
#Begins the pluginKey's execution to warn where is a key in the sequence "seq"
|
|
14
|
+
def execute(seqs)
|
|
15
|
+
@group_by_key=(@params.get_param('use_independent_folder_for_each_key')=='true')
|
|
16
|
+
|
|
17
|
+
seqs.each do |s|
|
|
18
|
+
exec_seq(s)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def exec_seq(seq)
|
|
24
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
|
|
25
|
+
# blast_table_results.inspect
|
|
26
|
+
|
|
27
|
+
actions=[]
|
|
28
|
+
|
|
29
|
+
key_size=0
|
|
30
|
+
# mid_size=0
|
|
31
|
+
key_beg,key_end=[0,3]
|
|
32
|
+
key_size=4
|
|
33
|
+
key=seq.seq_fasta[0..3].upcase
|
|
34
|
+
|
|
35
|
+
a = seq.new_action(key_beg,key_end,'ActionKey') # adds the actionKey to the sequence
|
|
36
|
+
actions.push a
|
|
37
|
+
|
|
38
|
+
#Add actions
|
|
39
|
+
seq.add_actions(actions)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if @group_by_key
|
|
43
|
+
|
|
44
|
+
seq.add_file_tag(0,'key_' + key, :dir)
|
|
45
|
+
add_stats('key_tag',key)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
add_stats('key_size',key_size)
|
|
49
|
+
# add_stats('mid_size',mid_size)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#Returns an array with the errors due to parameters are missing
|
|
56
|
+
def self.check_params(params)
|
|
57
|
+
errors=[]
|
|
58
|
+
|
|
59
|
+
# self.check_param(errors,params,'blast_evalue_mids','Float')
|
|
60
|
+
# self.check_param(errors,params,'blast_percent_mids','Integer')
|
|
61
|
+
comment='sequences containing with diferent keys (barcodes) are saved to separate folders'
|
|
62
|
+
default_value='false'
|
|
63
|
+
params.check_param(errors,'use_independent_folder_for_each_key','String',default_value,comment)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
return errors
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
end
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
require "plugin"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute PluginLinker
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
class PluginLinker < Plugin
|
|
10
|
+
MAX_LINKER_ERRORS=2
|
|
11
|
+
#-------------------------------------------------------------------------
|
|
12
|
+
#It's created an ActionInsert or ActionShortInsert before the ActionLinker
|
|
13
|
+
#Used: in class PluginLinker and PluginMid
|
|
14
|
+
#-------------------------------------------------------------------------
|
|
15
|
+
# def add_action_before_linker(p_q_beg,size_insert,actions,seq)
|
|
16
|
+
#
|
|
17
|
+
# size_min_insert = @params.get_param('size_min_insert').to_i
|
|
18
|
+
# if ((p_q_beg>0) && (size_insert>=size_min_insert)) #if linker's positions are right
|
|
19
|
+
# #It's created an ActionInsert or ActionShortInsert before the ActionLinker
|
|
20
|
+
# a = seq.new_action(0,p_q_beg-1,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
|
21
|
+
# actions.push a
|
|
22
|
+
# elsif (p_q_beg>0) #if linker's positions are right and insert's size is short
|
|
23
|
+
# #It's created an ActionShortInsert before the ActionLinker
|
|
24
|
+
# a = seq.new_action(0,p_q_beg-1,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
|
25
|
+
# actions.push a
|
|
26
|
+
# end
|
|
27
|
+
#
|
|
28
|
+
# end
|
|
29
|
+
|
|
30
|
+
#-------------------------------------------------------------------------
|
|
31
|
+
#It's created an ActionInsert or ActionShortInsert after the ActionLinker
|
|
32
|
+
#-------------------------------------------------------------------------
|
|
33
|
+
# def add_action_after_linker(p_q_end,size_insert,actions,seq)
|
|
34
|
+
#
|
|
35
|
+
# size_min_insert = @params.get_param('size_min_insert').to_i
|
|
36
|
+
#
|
|
37
|
+
# if ((p_q_end<seq.seq_fasta.size-1) && (size_insert>=size_min_insert) ) #if linker's positions are right
|
|
38
|
+
# #It's created an ActionInsert after the ActionLinker
|
|
39
|
+
# a = seq.new_action(p_q_end+1,seq.seq_fasta.size-1,"ActionInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
|
40
|
+
#
|
|
41
|
+
# actions.push a
|
|
42
|
+
#
|
|
43
|
+
# elsif (p_q_end<seq.seq_fasta.size-1) #if linker's positions are right and insert's size is short
|
|
44
|
+
# #It's created an ActionInsert after the ActionLinker
|
|
45
|
+
# a = seq.new_action(p_q_end+1,seq.seq_fasta.size-1,"ActionShortInsert") # adds the ActionInsert to the sequence before adding the actionMid
|
|
46
|
+
#
|
|
47
|
+
# actions.push a
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# end
|
|
51
|
+
#
|
|
52
|
+
|
|
53
|
+
def sum_quals(a)
|
|
54
|
+
res = 0
|
|
55
|
+
|
|
56
|
+
a.map{|e| res+=e}
|
|
57
|
+
|
|
58
|
+
return res
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def merge_hits_with_same_qbeg_and_qend(hits)
|
|
62
|
+
res =[]
|
|
63
|
+
|
|
64
|
+
hits.each do |hit|
|
|
65
|
+
if !res.find{|h| (h.q_beg==hit.q_beg) && (h.q_end==hit.q_end)}
|
|
66
|
+
res.push hit
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
return res
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Begins the plugin1's execution to warn that there is linker into the sequence
|
|
74
|
+
def execute(seqs)
|
|
75
|
+
blasts= do_blasts(seqs)
|
|
76
|
+
|
|
77
|
+
seqs.each_with_index do |s,i|
|
|
78
|
+
exec_seq(s,blasts.querys[i])
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def do_blasts(seqs)
|
|
83
|
+
# find MIDS with less results than max_target_seqs value
|
|
84
|
+
blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
|
85
|
+
|
|
86
|
+
$LOG.info('BLAST:'+blast.get_blast_cmd)
|
|
87
|
+
|
|
88
|
+
fastas=[]
|
|
89
|
+
|
|
90
|
+
seqs.each do |seq|
|
|
91
|
+
fastas.push ">"+seq.seq_name
|
|
92
|
+
fastas.push seq.seq_fasta
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# fastas=fastas.join("\n")
|
|
96
|
+
|
|
97
|
+
blast_table_results = blast.do_blast(fastas)
|
|
98
|
+
|
|
99
|
+
# puts blast_table_results.inspect
|
|
100
|
+
|
|
101
|
+
return blast_table_results
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def exec_seq(seq,blast_query)
|
|
106
|
+
if blast_query.query_id != seq.seq_name
|
|
107
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
|
108
|
+
end
|
|
109
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
|
|
110
|
+
|
|
111
|
+
# key_beg,key_end=search_key(seq,0,3) if false
|
|
112
|
+
# blast = BatchBlast.new("-subject #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
|
113
|
+
# blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
|
|
114
|
+
#
|
|
115
|
+
# blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to linkers executing over blast
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
#blast_table_results = BlastTableResult.new(res)
|
|
119
|
+
actions=[]
|
|
120
|
+
linker_size=0
|
|
121
|
+
|
|
122
|
+
if (!blast_query.hits.empty?) #linker found
|
|
123
|
+
|
|
124
|
+
linkers = merge_hits_with_same_qbeg_and_qend(blast_query.hits)
|
|
125
|
+
|
|
126
|
+
if linkers.count ==1
|
|
127
|
+
|
|
128
|
+
linker=linkers.first
|
|
129
|
+
|
|
130
|
+
linker_size=linker.q_end-linker.q_beg+1
|
|
131
|
+
|
|
132
|
+
if (linker.gaps+linker.mismatches>MAX_LINKER_ERRORS) # number of ERRORS and GAPs is higher than MAX_LINKER_ERRORS,
|
|
133
|
+
seq.seq_rejected=true
|
|
134
|
+
seq.seq_rejected_by_message='linker with mismatches'
|
|
135
|
+
# @stats[:rejected_seqs]={'rejected_seqs_by_errors' => 1}
|
|
136
|
+
add_stats('rejected','by_linker_errors')
|
|
137
|
+
add_stats('linker_errors',linker.gaps+linker.mismatches)
|
|
138
|
+
else
|
|
139
|
+
#Create an ActionLinker
|
|
140
|
+
a = seq.new_action(linker.q_beg,linker.q_end,'ActionLinker') # adds the ActionLinker to the sequence
|
|
141
|
+
a.message = linker.subject_id
|
|
142
|
+
a.tag_id = linker.subject_id
|
|
143
|
+
actions.push a
|
|
144
|
+
|
|
145
|
+
# seq.add_file_tag(0, 'paired', :file)
|
|
146
|
+
|
|
147
|
+
add_stats('linker_id',linker.subject_id)
|
|
148
|
+
add_stats('linker_id','total')
|
|
149
|
+
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
else # multiple linkers found
|
|
153
|
+
q_begs=[]
|
|
154
|
+
q_ends=[]
|
|
155
|
+
|
|
156
|
+
linker_count=linkers.count
|
|
157
|
+
|
|
158
|
+
linkers.each do |linker|
|
|
159
|
+
#puts "*MULTILINKER* #{linker.subject_id[0..40].ljust(40)} #{linker.q_beg.to_s.rjust(6)} #{linker.q_end.to_s.rjust(6)} #{linker.s_beg.to_s.rjust(6)} #{linker.s_end.to_s.rjust(6)}"
|
|
160
|
+
q_begs.push linker.q_beg
|
|
161
|
+
q_ends.push linker.q_end
|
|
162
|
+
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
first_linker = linkers.first
|
|
166
|
+
last_linker = linkers.last
|
|
167
|
+
|
|
168
|
+
a = seq.new_action(q_begs.min,q_ends.max,'ActionMultipleLinker') # adds the ActionLinker to the sequence
|
|
169
|
+
a.message = "#{linker_count} x #{first_linker.subject_id}"
|
|
170
|
+
a.tag_id = first_linker.subject_id
|
|
171
|
+
|
|
172
|
+
#determine with part (left or right) has the best quality
|
|
173
|
+
left_quals = seq.seq_qual[0,q_begs.min]
|
|
174
|
+
sum_left=sum_quals(left_quals)
|
|
175
|
+
|
|
176
|
+
right_quals = seq.seq_qual[q_ends.max+1..seq.seq_qual.length]
|
|
177
|
+
sum_right=sum_quals(right_quals)
|
|
178
|
+
|
|
179
|
+
if sum_left>=sum_right
|
|
180
|
+
a.right_action=true
|
|
181
|
+
else
|
|
182
|
+
a.left_action=true
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
#puts "SUM QUAL LEFT:#{sum_left} count:#{left_quals.length}"
|
|
186
|
+
#puts "SUM QUAL RIGHT:#{sum_right} count:#{right_quals.length}"
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
actions.push a
|
|
190
|
+
|
|
191
|
+
add_stats('multiple_linker_id',first_linker.subject_id)
|
|
192
|
+
add_stats('multiple_linker_id','total')
|
|
193
|
+
add_stats('multiple_linker_count',linker_count)
|
|
194
|
+
|
|
195
|
+
# puts "=== > seq_qual: #{seq.seq_qual.count}"
|
|
196
|
+
# seq.get_qual_inserts.each do |qi|
|
|
197
|
+
# puts "==> #{qi.join(' ')}"
|
|
198
|
+
# end
|
|
199
|
+
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
else # no linker found
|
|
203
|
+
add_stats('without_linker',linker_size)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
if !actions.empty?
|
|
208
|
+
#Add actions
|
|
209
|
+
seq.add_actions(actions)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
#Returns an array with the errors due to parameters are missing
|
|
216
|
+
def self.check_params(params)
|
|
217
|
+
errors=[]
|
|
218
|
+
|
|
219
|
+
comment='Blast E-value used as cut-off when searching for linkers in paired-ends'
|
|
220
|
+
default_value = 1e-10
|
|
221
|
+
params.check_param(errors,'blast_evalue_linkers','Float',default_value,comment)
|
|
222
|
+
|
|
223
|
+
comment='Minimum required identity (%) for a reliable linker'
|
|
224
|
+
default_value = 95
|
|
225
|
+
params.check_param(errors,'blast_percent_linkers','Integer',default_value,comment)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
return errors
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
########################################################
|
|
2
|
+
# Author: Almudena Bocinos Rioboo
|
|
3
|
+
#
|
|
4
|
+
# Defines the main methods that are necessary to execute PluginLowComplexity
|
|
5
|
+
|
|
6
|
+
#
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
require "plugin"
|
|
11
|
+
|
|
12
|
+
MIN_DUST_SIZE = 30
|
|
13
|
+
|
|
14
|
+
class PluginLowComplexity < Plugin
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(seqs)
|
|
19
|
+
dust= do_dust(seqs)
|
|
20
|
+
|
|
21
|
+
seqs.each_with_index do |s,i|
|
|
22
|
+
exec_seq(s,dust[i])
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def do_dust(seqs)
|
|
27
|
+
# find MIDS with less results than max_target_seqs value
|
|
28
|
+
dust_masker=DustMasker.new()
|
|
29
|
+
|
|
30
|
+
fastas=[]
|
|
31
|
+
|
|
32
|
+
seqs.each do |seq|
|
|
33
|
+
fastas.push ">"+seq.seq_name
|
|
34
|
+
fastas.push seq.seq_fasta
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# fastas=fastas.join("\n")
|
|
38
|
+
|
|
39
|
+
found_dust = dust_masker.do_dust(fastas)
|
|
40
|
+
# puts found_dust
|
|
41
|
+
# puts blast_table_results.inspect
|
|
42
|
+
|
|
43
|
+
return found_dust
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def exec_seq(seq,dust_query)
|
|
48
|
+
if dust_query.query_id != seq.seq_name
|
|
49
|
+
raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
|
|
50
|
+
end
|
|
51
|
+
actions=[]
|
|
52
|
+
|
|
53
|
+
# puts "Checking for dust: #{seq.seq_fasta}"
|
|
54
|
+
# puts found_dust.to_json
|
|
55
|
+
total_dust=0
|
|
56
|
+
if !dust_query.nil?
|
|
57
|
+
dust_query.dust.each do |dust|
|
|
58
|
+
start=dust[0]
|
|
59
|
+
stop=dust[1]
|
|
60
|
+
dust_size=dust[1]-dust[0]+1
|
|
61
|
+
total_dust+=dust_size
|
|
62
|
+
|
|
63
|
+
if (dust_size)>=MIN_DUST_SIZE
|
|
64
|
+
|
|
65
|
+
a = seq.new_action(start,stop,'ActionLowComplexity')
|
|
66
|
+
# a.left_action=true
|
|
67
|
+
actions.push a
|
|
68
|
+
# break
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
if !actions.empty?
|
|
74
|
+
add_stats('low_complexity',total_dust)
|
|
75
|
+
seq.add_actions(actions)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
######################################################################
|
|
83
|
+
#---------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
#Returns an array with the errors due to parameters are missing
|
|
86
|
+
def self.check_params(params)
|
|
87
|
+
errors=[]
|
|
88
|
+
|
|
89
|
+
#
|
|
90
|
+
# comment='Minimum percent of T bases in poly_a to be accepted'
|
|
91
|
+
# default_value = 80
|
|
92
|
+
# params.check_param(errors,'poly_t_percent','Integer',default_value,comment)
|
|
93
|
+
#
|
|
94
|
+
return errors
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
########################################################
|
|
2
|
+
# Author: Almudena Bocinos Rioboo
|
|
3
|
+
#
|
|
4
|
+
# Defines the main methods that are necessary to execute PluginLowHighSize
|
|
5
|
+
|
|
6
|
+
#
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
require "plugin"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PluginLowHighSize < Plugin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Begins the plugin_low_high_size's execution with the sequence "seq"
|
|
16
|
+
|
|
17
|
+
def execute(seqs)
|
|
18
|
+
seqs.each do |s|
|
|
19
|
+
exec_seq(s)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def exec_seq(seq)
|
|
25
|
+
$LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
|
|
26
|
+
|
|
27
|
+
min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
|
|
28
|
+
max_size = @params.get_param('max_sequence_size_raw').to_i #max_size is: mean + 2dev
|
|
29
|
+
#add_stats('rejected_seqs',seq.seq_fasta.length)
|
|
30
|
+
actions=[]
|
|
31
|
+
|
|
32
|
+
if ((seq.seq_fasta.length>max_size) || (seq.seq_fasta.length<min_size)) #if length of sequence is out of (-2dev,2dev)
|
|
33
|
+
$LOG.debug "#{seq.seq_name} rejected by size #{seq.seq_fasta.length} "
|
|
34
|
+
type='ActionLowHighSize'
|
|
35
|
+
# seq.add_action(0,seq.seq_fasta.length,type)
|
|
36
|
+
a = seq.new_action(0,seq.seq_fasta.length,type)
|
|
37
|
+
a.message = 'low or high size'
|
|
38
|
+
seq.seq_rejected = true
|
|
39
|
+
seq.seq_rejected_by_message= 'size out of limits'
|
|
40
|
+
|
|
41
|
+
add_stats('rejected_seqs',seq.seq_fasta.length)
|
|
42
|
+
actions.push a
|
|
43
|
+
seq.add_actions(actions)
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
######################################################################
|
|
51
|
+
#---------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
######################################################################
|
|
56
|
+
#---------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
#Returns an array with the errors due to parameters are missing
|
|
59
|
+
def self.check_params(params)
|
|
60
|
+
errors=[]
|
|
61
|
+
|
|
62
|
+
comment='Minimum size for a raw input sequence to be analysed (shorter reads are directly rejected without further analysis)'
|
|
63
|
+
default_value = 40
|
|
64
|
+
params.check_param(errors,'min_sequence_size_raw','Integer',default_value,comment)
|
|
65
|
+
|
|
66
|
+
#self.check_param(errors,params,'max_sequence_size_raw','Integer')
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
return errors
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
end
|