seqtrimnext 2.0.51 → 2.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -1,245 +0,0 @@
1
- require "plugin"
2
-
3
-
4
- ########################################################
5
- # Author: Almudena Bocinos Rioboo
6
- #
7
- # Defines the main methods that are necessary to execute PluginRemAditArtifacts
8
-
9
- #
10
- # Inherit: Plugin
11
- ########################################################
12
-
13
- class PluginRemAditArtifacts < Plugin
14
-
15
-
16
-
17
- # Begins the plugin_low_high_size's execution whit the sequence "seq"
18
- # Returns a list with start of polyA or polyT seq or 0 if not found
19
- # start of a possible second polyAT what was found in the second search, since it looks for both
20
- # Uses the param polyA_length to look for at least that number of contiguous A's
21
- def execute(seqs)
22
- seqs.each do |s|
23
- exec_seq(s)
24
- end
25
- end
26
-
27
-
28
- def exec_seq(seq)
29
-
30
- $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
31
- seq2 = seq.seq_fasta
32
- first = 0
33
- last = seq2.size-1
34
- old_first=first
35
- old_last=last
36
-
37
-
38
- while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
39
- first += 6
40
- seq2.slice!(0..5)
41
- end
42
-
43
-
44
- while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
45
- last -= 6
46
- seq2.slice!(seq2.size-1-5..seq2.size-1)
47
-
48
- end
49
-
50
-
51
- #is_forward, is_cDNA,
52
- #TrimExtremeNXs(first,last)
53
- is_forward = @params.get_param('is_forward')=='true'
54
- is_cDNA = @params.get_param('is_cDNA')=='true'
55
-
56
- previous_first,previous_last =0,0
57
-
58
- until ((previous_first == first) && (previous_last == last))
59
- previous_first,previous_last = first, last
60
-
61
- if (is_cDNA)
62
- if (is_forward)
63
-
64
- nTs = 0
65
- nTs = $1.length if (seq2 =~ /^(T+)/i)
66
-
67
- if (nTs > 3)
68
- seq2.slice!(0..nTs -1)
69
- first += nTs #-1
70
-
71
- end
72
-
73
- nAs = 0
74
- nAs = $1.length if (seq2 =~ /(A+)$/i)
75
-
76
- if (nAs > 3)
77
- seq2.slice!(seq2.size - nAs..seq2.size - 1)
78
- last -= nAs
79
-
80
- end
81
- else #si es backward
82
-
83
- nTs = 0
84
- nTs = $1.length if (seq2 =~ /(T+)$/i)
85
-
86
- if (nTs > 3)
87
- seq2.slice!(seq2.size-nTs..seq2.size-1)
88
- last -= nTs
89
-
90
- end
91
-
92
- nAs = 0
93
- nAs = $1.length if (seq2 =~ /^(A+)/i)
94
-
95
- if (nAs > 3)
96
- seq2.slice!(0..nAs -1)
97
- first += nAs
98
-
99
- end
100
- end
101
- end
102
- end
103
-
104
-
105
- if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
106
- type='ActionRemAditArtifacts'
107
- actions = []
108
- # seq.add_action(first,last,type)
109
- a=seq.new_action(first,last,type)
110
- actions.push a
111
- seq.add_actions(actions)
112
- end
113
-
114
-
115
- end
116
- ######################################################################
117
- #---------------------------------------------------------------------
118
- def execute_old(seq)
119
- seq2 = seq.seq_fasta
120
- #seq2 = 'dGCGGGG'
121
- first = 0
122
- last = seq2.size-1
123
- old_first=first
124
- old_last=last
125
-
126
- # puts '1 '+seq2
127
- # puts 'POS '+first.to_s
128
- # puts 'POS '+last.to_s
129
- while (seq2 =~ /^(GCGGGG|CCCCGC)/i)
130
- first += 6
131
- seq2.slice!(0..5)
132
- # puts '2 '+seq2
133
- # already = true
134
- end
135
-
136
-
137
- while (seq2 =~ /(GCGGGG|CCCCGC)$/i)
138
- last -= 6
139
- seq2.slice!(seq2.size-1-5..seq2.size-1)
140
- # puts '3 '+seq2
141
- # already = true
142
- end
143
-
144
-
145
- #is_forward, is_cDNA,
146
- #TrimExtremeNXs(first,last)
147
- is_forward = @params.get_param('is_forward')
148
- is_cDNA = @params.get_param('is_cDNA')
149
- # puts '4 '+seq2
150
- previous_first,previous_last =0,0
151
-
152
- until ((previous_first == first) && (previous_last == last))
153
- previous_first,previous_last = first, last
154
- # puts 'POS5-F '+first.to_s
155
- # puts 'POS5-L '+last.to_s
156
-
157
- if (is_cDNA)
158
- if (is_forward)
159
- # puts '5 '+seq2
160
- nTs = 0
161
- nTs = $1.length if (seq2 =~ /^(T+)/i)
162
- if (nTs > 3)
163
- seq2.slice!(0..nTs -1)
164
- # puts '6 '+seq2
165
- first += nTs #-1
166
- # puts 'POS6-F '+first.to_s
167
- end
168
- nAs = 0
169
- nAs = $1.length if (seq2 =~ /(A+)$/i)
170
- # puts '6-7 '+seq2 + nAs.to_s
171
- if (nAs > 3)
172
- # puts '7 '+seq2
173
- seq2.slice!(seq2.size - nAs..seq2.size - 1)
174
- last -= nAs#seq2.size-nAs-2
175
- # puts 'POS7-L '+last.to_s
176
- end
177
- else #si es backward
178
- # puts '5b '+seq2
179
- nTs = 0
180
- nTs = $1.length if (seq2 =~ /(T+)$/i)
181
- if (nTs > 3)
182
- # puts '6b '+seq2
183
- seq2.slice!(seq2.size-nTs..seq2.size-1)
184
- last -= nTs#seq2.size-nTs -2
185
- # puts 'POS6b-L '+last.to_s
186
- end
187
-
188
- nAs = 0
189
- nAs = $1.length if (seq2 =~ /^(A+)/i)
190
- if (nAs > 3)
191
- # puts '7b '+seq2
192
- seq2.slice!(0..nAs -1)
193
- first += nAs#nAs -1
194
- # puts 'POS7b-f '+first.to_s
195
- end
196
- end
197
- end
198
- end
199
-
200
- #first -= 1 if (old_first!= first)
201
- #last += 1 if (old_last!= last)
202
-
203
- # puts 'POS7-8 '+first.to_s
204
- # puts 'POS7-8 '+last.to_s
205
-
206
- if (((first>=0) && (first>old_first)) || ((last>=0) && (last<old_last)))
207
- type='ActionRemAditArtifacts'
208
-
209
- # puts '8 '+seq2
210
- seq.add_action(first,last,type)
211
- end
212
- # puts '9 '+seq2
213
-
214
- end
215
-
216
-
217
-
218
- ######################################################################
219
- #---------------------------------------------------------------------
220
-
221
- #Returns an array with the errors due to parameters are missing
222
- def self.check_params(params)
223
- errors=[]
224
-
225
-
226
-
227
- # if !params.exists?('ta')
228
- # errors.push " The param <ta> doesn't exist"
229
- # end
230
-
231
- # if !params.exists?('poly_at_length')
232
- # errors.push " The param <poly_at_length> doesn't exist"
233
- # end
234
-
235
-
236
-
237
- return errors
238
- end
239
-
240
-
241
-
242
-
243
-
244
-
245
- end