seqtrimnext 2.0.45 → 2.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +7 -2
  3. data/bin/filter_database.rb +39 -0
  4. data/bin/join_big_illumina_paired.sh +122 -0
  5. data/bin/seqtrimnext +2 -1
  6. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +87 -121
  7. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +4 -1
  8. data/lib/seqtrimnext/classes/graph_stats.rb +7 -2
  9. data/lib/seqtrimnext/classes/seqtrim.rb +3 -2
  10. data/lib/seqtrimnext/classes/sequence_with_action.rb +1 -1
  11. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +2 -2
  12. data/lib/seqtrimnext/plugins/plugin_adapters.rb +2 -2
  13. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +165 -0
  14. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +2 -2
  15. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +3 -3
  16. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -1
  17. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -1
  18. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -1
  19. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -1
  20. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -1
  21. data/lib/seqtrimnext/plugins/plugin_linker.rb +2 -2
  22. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +1 -1
  23. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +1 -1
  24. data/lib/seqtrimnext/plugins/plugin_mids.rb +2 -2
  25. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +1 -1
  26. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +2 -2
  27. data/lib/seqtrimnext/plugins/plugin_vectors.rb +2 -2
  28. data/lib/seqtrimnext/templates/genomics_illumina.txt +5 -0
  29. data/lib/seqtrimnext/templates/transcriptomics_illumina.txt +8 -0
  30. data/lib/seqtrimnext/utils/hash_stats.rb +2 -1
  31. data/lib/seqtrimnext.rb +1 -1
  32. metadata +14 -5
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.46 2012-04-13
2
+
3
+ Checkpointing activated. Jobs can be restarted where stopped.
4
+
1
5
  === 2.0.45 2012-03-05
2
6
 
3
7
  Improved LowComplexity plugin to ignore low complexity regions inside low qual regions
data/Manifest.txt CHANGED
@@ -4,9 +4,11 @@ bin/extract_seqs_from_fasta.rb
4
4
  bin/extract_seqs_from_fastq.rb
5
5
  bin/fasta2fastq.rb
6
6
  bin/fastq2fasta.rb
7
+ bin/filter_database.rb
7
8
  bin/gen_qual.rb
8
9
  bin/get_seq.rb
9
10
  bin/group_by_range.rb
11
+ bin/join_big_illumina_paired.sh
10
12
  bin/join_ilumina_paired.rb
11
13
  bin/parse_amplicons.rb
12
14
  bin/parse_json_results.rb
@@ -18,6 +20,7 @@ bin/seqtrimnext
18
20
  bin/split_fastq.rb
19
21
  bin/split_ilumina_paired.rb
20
22
  bin/split_paired.rb
23
+ History.txt
21
24
  lib/seqtrimnext/actions/action_ab_adapter.rb
22
25
  lib/seqtrimnext/actions/action_ab_far_adapter.rb
23
26
  lib/seqtrimnext/actions/action_ab_left_adapter.rb
@@ -35,13 +38,13 @@ lib/seqtrimnext/actions/action_low_complexity.rb
35
38
  lib/seqtrimnext/actions/action_low_high_size.rb
36
39
  lib/seqtrimnext/actions/action_low_quality.rb
37
40
  lib/seqtrimnext/actions/action_mid.rb
41
+ lib/seqtrimnext/actions/action_middle_adapter.rb
38
42
  lib/seqtrimnext/actions/action_multiple_linker.rb
39
43
  lib/seqtrimnext/actions/action_paired_reads.rb
40
44
  lib/seqtrimnext/actions/action_poly_a.rb
41
45
  lib/seqtrimnext/actions/action_poly_t.rb
42
46
  lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
43
47
  lib/seqtrimnext/actions/action_right_adapter.rb
44
- lib/seqtrimnext/actions/action_middle_adapter.rb
45
48
  lib/seqtrimnext/actions/action_right_primer.rb
46
49
  lib/seqtrimnext/actions/action_short_insert.rb
47
50
  lib/seqtrimnext/actions/action_unexpected_poly_t.rb
@@ -71,6 +74,7 @@ lib/seqtrimnext/classes/sequence_with_action.rb
71
74
  lib/seqtrimnext/plugins/plugin.rb
72
75
  lib/seqtrimnext/plugins/plugin_ab_adapters.rb
73
76
  lib/seqtrimnext/plugins/plugin_adapters.rb
77
+ lib/seqtrimnext/plugins/plugin_adapters_old.rb
74
78
  lib/seqtrimnext/plugins/plugin_amplicons.rb
75
79
  lib/seqtrimnext/plugins/plugin_contaminants.rb
76
80
  lib/seqtrimnext/plugins/plugin_extract_inserts.rb
@@ -89,9 +93,11 @@ lib/seqtrimnext/plugins/plugin_vectors.rb
89
93
  lib/seqtrimnext/templates/amplicons.txt
90
94
  lib/seqtrimnext/templates/genomics_454.txt
91
95
  lib/seqtrimnext/templates/genomics_454_with_paired.txt
96
+ lib/seqtrimnext/templates/genomics_illumina.txt
92
97
  lib/seqtrimnext/templates/low_quality.txt
93
98
  lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
94
99
  lib/seqtrimnext/templates/transcriptomics_454.txt
100
+ lib/seqtrimnext/templates/transcriptomics_illumina.txt
95
101
  lib/seqtrimnext/templates/transcriptomics_plants.txt
96
102
  lib/seqtrimnext/utils/extract_samples.rb
97
103
  lib/seqtrimnext/utils/fasta2xml.rb
@@ -103,7 +109,6 @@ lib/seqtrimnext/utils/load_qual_in_hash.rb
103
109
  lib/seqtrimnext/utils/recover_mid.rb
104
110
  lib/seqtrimnext/utils/string_utils.rb
105
111
  lib/seqtrimnext.rb
106
- History.txt
107
112
  Manifest.txt
108
113
  PostInstall.txt
109
114
  Rakefile
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ if ARGV.count!=3
6
+ puts "Usage: #{File.basename($0)} database min_size name_list"
7
+ exit
8
+ end
9
+ min_size = ARGV[1].to_i
10
+
11
+ # read keywords
12
+ keywords=File.read(ARGV[2]).split("\n")
13
+
14
+ # convert all to upcase
15
+ keywords.map { |keyword| keyword.upcase!}
16
+
17
+ # puts "Search keywords"
18
+ # keywords.each { |keyword| puts keyword}
19
+
20
+ fqr=FastaQualFile.new(ARGV[0])
21
+
22
+ all=[]
23
+
24
+ fqr.each do |n,s,c|
25
+ keywords.each do |keyword|
26
+ if s.length<=min_size
27
+ # all+=c.split(" ")
28
+ if c.upcase.index(keyword)
29
+ # puts "[#{s.length.to_s}] - #{n} - #{c}"
30
+ puts ">#{n} #{c}\n#{s}"
31
+ break
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ # puts all.sort.uniq.reject{|e| e=~/\d/}
38
+
39
+ fqr.close
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # Sort two big illumina files corresponding to paired-end experiment and then join common sequences on different files. Sequences not in common goes to a separate file.
4
+
5
+ # cat $1 | awk '{split($0, a, " "); n++; if (n%1==0){printf("%s\t",a[1]);}; printf("%s",$0); if(n%4==0) { printf("\n");} else { printf("\t");} }'
6
+ #
7
+ # exit
8
+
9
+ if [ "$#" < 4 ];
10
+ then
11
+ echo ""
12
+ echo "Use: $0 file1.fastq file2.fastq base_output_name tmp_dir"
13
+ echo ""
14
+ exit
15
+ fi
16
+
17
+ base_name=$3
18
+
19
+ if [[ -z "$base_name" ]]; then
20
+ echo "Use a : $base_name doesn't exists"
21
+ exit -1
22
+ fi
23
+
24
+ tmp_dir=$4
25
+
26
+ if [[ -z "$4" ]]; then
27
+ tmp_dir=`pwd`
28
+ fi
29
+
30
+ if [[ ! -e "$tmp_dir" ]]; then
31
+ echo "Tmp dir: $4 doesn't exists"
32
+ exit -1
33
+ fi
34
+
35
+ echo "Using TMPDIR $tmp_dir"
36
+
37
+ f1_path=$1
38
+ f2_path=$2
39
+
40
+ f1_name=`basename $1`
41
+ f2_name=`basename $2`
42
+
43
+ f1_tmp="$tmp_dir/${f1_name}"
44
+ f2_tmp="$tmp_dir/${f2_name}"
45
+
46
+ common_names="$tmp_dir/comm.names"
47
+
48
+ only_in_1="$tmp_dir/only_in_1.txt"
49
+ only_in_2="$tmp_dir/only_in_2.txt"
50
+ in_both="$tmp_dir/in_both.txt"
51
+
52
+
53
+ echo "Starting sorting"
54
+
55
+ if [[ ! -e "$f1_tmp.sorted" ]]; then
56
+ echo "Sorting $f1_name"
57
+ cat $f1_path | awk '{split($0, a, " "); sub(/\/1$/,"\t", a[1]); n++; if (n%4==1){printf("%s",a[1]);}; printf("%s",$0); if(n%4==0) { printf("\n");} else { printf("\t");} }' | sort -T $tmp_dir -k1,1 -t $'\t' > $f1_tmp.sorted &
58
+
59
+ fi
60
+
61
+ if [[ ! -e "$f2_tmp.sorted" ]]; then
62
+ echo "Sorting $f2_name"
63
+ cat $f2_path | awk '{split($0, a, " "); sub(/\/2$/,"\t", a[1]); n++; if (n%4==1){printf("%s",a[1]);}; printf("%s",$0); if(n%4==0) { printf("\n");} else { printf("\t");} }' | sort -T $tmp_dir -k1,1 -t $'\t' > $f2_tmp.sorted &
64
+ fi
65
+ wait
66
+
67
+ echo "Starting name extraction"
68
+ if [[ ! -e "$f1_tmp.names" ]]; then
69
+ echo "Extracting names from $f1_tmp.sorted"
70
+ # cat $1.sorted | cut -f1 | sed 's/\(.*\)\/1$/\1/' > $1.names &
71
+ cat $f1_tmp.sorted | cut -f1 > $f1_tmp.names &
72
+ fi
73
+ if [[ ! -e "$f2_tmp.names" ]]; then
74
+ echo "Extracting names from $f2_tmp.sorted"
75
+ cat $f2_tmp.sorted | cut -f1 > $f2_tmp.names &
76
+ fi
77
+ wait
78
+
79
+ echo "Starting names comparison"
80
+ if [[ ! -e "$common_names" ]]; then
81
+ echo "Making comm file"
82
+ # diff $1.names $2.names > names.diff
83
+ comm $f1_tmp.names $f2_tmp.names > $common_names
84
+ fi
85
+
86
+ echo "Starting names extraction"
87
+ # grep '^>' names.diff | cut -d ' ' -f2 | awk '{ printf("%s/2\n",$0) }' > only_in_2.txt &
88
+ # grep '^<' names.diff | cut -d ' ' -f2 | awk '{ printf("%s/1\n",$0) }' > only_in_1.txt &
89
+
90
+ grep -P '^[^\t]' $common_names > $only_in_1 &
91
+ grep -P '^\t[^\t]' $common_names |tr -d "\t" > $only_in_2 &
92
+ grep -P '^\t\t[^\t]' $common_names |tr -d "\t" > $in_both &
93
+ wait
94
+
95
+ echo "Num seqs only in 1) $f1_name"
96
+ wc -l $only_in_1
97
+
98
+ echo "Num seqs only in 2) $f2_name"
99
+ wc -l $only_in_2
100
+
101
+ echo "Num seqs in both $f1_name and $f2_name"
102
+ wc -l $in_both
103
+
104
+ echo "Starting extracting seqs"
105
+ join -t $'\t' -1 1 -2 1 $only_in_1 $f1_tmp.sorted |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_normal1.fastq &
106
+ join -t $'\t' -1 1 -2 1 $only_in_2 $f2_tmp.sorted |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_normal2.fastq &
107
+
108
+ join -t $'\t' -1 1 -2 1 $in_both $f1_tmp.sorted |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_paired1.fastq &
109
+ join -t $'\t' -1 1 -2 1 $in_both $f2_tmp.sorted |cut -f 2,3,4,5| tr "\t" "\n" > ${base_name}_paired2.fastq &
110
+ wait
111
+
112
+ rm $f1_tmp.names
113
+ rm $f2_tmp.names
114
+
115
+ rm $f1_tmp.sorted
116
+ rm $f2_tmp.sorted
117
+
118
+ rm $only_in_2
119
+ rm $only_in_1
120
+ rm $in_both
121
+
122
+ rm $common_names
data/bin/seqtrimnext CHANGED
@@ -57,6 +57,7 @@
57
57
  # $: << File.expand_path(ROOT_PATH)
58
58
 
59
59
  $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
60
+ $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib/')
60
61
 
61
62
  require 'seqtrimnext'
62
63
 
@@ -272,7 +273,7 @@ optparse = OptionParser.new do |opts|
272
273
  end
273
274
 
274
275
  options[:skip_report] = false
275
- opts.on( '-R', '--no-report', 'Change to no verbose mode. Every sequence will not be written to output log' ) do
276
+ opts.on( '-R', '--no-report', 'Do not generate final PDF report (gem scbi_seqtrimnext_report required if you want to generate PDF report).' ) do
276
277
  options[:skip_report] = true
277
278
  end
278
279
 
@@ -18,29 +18,21 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
18
18
  @@params= params
19
19
  @@exit = false
20
20
 
21
+ @@ongoing_stats={}
22
+ @@ongoing_stats[:sequence_count] = 0
23
+ @@ongoing_stats[:smallest_sequence_size] = 900000000000000
24
+ @@ongoing_stats[:biggest_sequence_size] = 0
25
+
21
26
  @@skip_output=skip_output
22
27
 
23
28
  @@chunk_size = chunk_size
24
29
 
25
-
26
- # puts "CHECKPOINT: #{self.checkpoint}\n"*20
27
-
28
- checkpoint_exists=File.exists?('scbi_drb_checkpoint')
30
+ checkpoint_exists=File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
29
31
 
30
32
  # @@use_qual = !qual_path.nil? and File.exists?(qual_path)
31
33
  @@open_mode='w'
32
34
  if checkpoint_exists
33
35
  @@open_mode = 'a'
34
- if File.exists?(STATS_PATH)
35
- # load stats
36
- text = File.read(STATS_PATH)
37
-
38
- # wipe text
39
- # text=text.grep(/^\s*[^#]/).to_s
40
-
41
- # decode json
42
- @@full_stats = JSON.parse(text)
43
- end
44
36
  end
45
37
 
46
38
  #open input file
@@ -91,67 +83,38 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
91
83
 
92
84
  puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
93
85
 
86
+ # create stats file
94
87
  f = File.open(STATS_PATH,'w')
95
88
  f.puts JSON.pretty_generate(@@full_stats)
96
89
  f.close
97
90
 
98
- r=File.read(STATS_PATH)
91
+ # if initial files doesn't exists, create it
92
+ if !File.exists?(File.join(OUTPUT_PATH,'initial_stats.json'))
93
+ File.open(File.join(OUTPUT_PATH,'initial_stats.json'),'w') do |f|
94
+ f.puts JSON.pretty_generate(@@ongoing_stats)
95
+ end
96
+ end
97
+
99
98
 
99
+ # load stats
100
+ r=File.read(STATS_PATH)
100
101
  stats=JSON::parse(r)
101
102
 
103
+
102
104
 
105
+ # make graphs
103
106
  gs=GraphStats.new(stats)
104
107
 
105
- #gs=GraphStats.new(@@full_stats)
106
-
107
-
108
108
  #close all files
109
-
110
- # @@fqr.close
111
109
  if @@use_json
112
110
  @@json_output.close
113
111
  end
114
112
  @@errors_file.close
115
- # @@rejected_output_file.close
116
-
117
- # @@paired_output_files.each do |k,file|
118
- # file.close
119
- # end
120
113
 
121
114
  @@files.each do |k,file|
122
115
  file.close
123
116
  end
124
117
 
125
- # @@paired_qual_output_files.each do |k,file|
126
- # file.close
127
- # end
128
-
129
- # @@sequences_output_files.each do |k,file|
130
- # file.close
131
- # end
132
- #
133
- # @@low_complexity_output_files.each do |k,file|
134
- # file.close
135
- # end
136
- #
137
- # @@sffinfo_files.each do |k,file|
138
- # file.close
139
- # end
140
- #
141
- # @@low_sffinfo_files.each do |k,file|
142
- # file.close
143
- # end
144
-
145
-
146
- # @@qual_output_files.each do |k,file|
147
- # file.close
148
- # end
149
-
150
- # more than one MID found
151
- # if @@full_stats['mid_id'] && @@full_stats['mid_id'].count>1
152
- #
153
- # end
154
-
155
118
  if File.exists?('scbi_drb_checkpoint')
156
119
  File.delete('scbi_drb_checkpoint')
157
120
  end
@@ -172,21 +135,71 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
172
135
  end
173
136
 
174
137
  def load_user_checkpoint(checkpoint)
138
+ # load full_stats from file !!!!!!!!!!!!!
175
139
 
176
- # reset count stats since they are repeated by checkpointing
140
+ if File.exists?(STATS_PATH)
177
141
 
178
- if @@full_stats['sequences'] && @@full_stats['repeated']
179
- @@full_stats['sequences']['count']['repeated']=0
180
- end
181
-
182
- if @@full_stats['sequences'] && @@full_stats['processed']
183
- @@full_stats['sequences']['processed']['count']=0
142
+ # load stats
143
+ text = File.read(STATS_PATH)
144
+
145
+ # wipe text
146
+ # text=text.grep(/^\s*[^#]/).to_s
147
+
148
+ # decode json
149
+ @@full_stats = JSON.parse(text)
184
150
  end
185
-
186
- if @@full_stats['sequences'] && @@full_stats['total']
187
- @@full_stats['sequences']['total']['count']=0
151
+
152
+ # reset count stats since they are repeated by checkpointing
153
+
154
+ # {
155
+ # "sequences": {
156
+ # "count": {
157
+ # "input_count": 1600,
158
+ # "output_seqs": 933,
159
+ # "rejected": 67
160
+ # },
161
+ # "rejected": {
162
+ # "short insert": 39,
163
+ # "contaminated": 26,
164
+ # "unexpected vector": 2
165
+ # }
166
+ # }
167
+ # }
168
+
169
+ if @@full_stats['sequences']
170
+ if @@full_stats['sequences']['count']
171
+ # set input count to 0
172
+ @@full_stats['sequences']['count']['input_count']=0
173
+
174
+ # do not remove outputseqs
175
+ # @@full_stats['sequences']['count']['output_seqs']=0
176
+ end
177
+
178
+ # remove rejected due to repetitions from rejected count
179
+ if @@full_stats['sequences']['rejected']
180
+
181
+ # it there are repeated
182
+ if (@@full_stats['sequences']['rejected']['repeated'])
183
+
184
+ # if repeated count > 0 and there count exists
185
+ if (@@full_stats['sequences']['rejected']['repeated'] > 0) and @@full_stats['sequences']['count']
186
+
187
+ # discount repeated from rejected, since they are going to be added again by checkout process
188
+ @@full_stats['sequences']['count']['rejected'] -= @@full_stats['sequences']['rejected']['repeated']
189
+ end
190
+
191
+ # set repeated to 0
192
+ @@full_stats['sequences']['rejected']['repeated']=0
193
+ end
194
+ end
188
195
  end
189
196
 
197
+
198
+ # puts "Loaded Stats"
199
+ # puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
200
+
201
+ # TODO - remove sequences from rejected file that were added by cloned
202
+
190
203
  super
191
204
  # return checkpoint
192
205
  end
@@ -202,17 +215,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
202
215
 
203
216
  # read a work that will not be processed, only to skip until checkpoint
204
217
  def trash_checkpointed_work
205
-
206
- @@chunk_size.times do
207
- begin
208
- n,f,q,c = @@fqr.next_seq
209
- end while (!n.nil? && @@params.repeated_seq?(n))
210
-
211
- if n.nil?
212
- break
213
- end
214
- end
215
-
218
+ warn "Deprecated: trash_checkpointed_work was deprecated, it is automatic now"
216
219
  end
217
220
 
218
221
  def next_work
@@ -228,12 +231,16 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
228
231
  if !n.nil? && @@params.repeated_seq?(n)
229
232
  @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
230
233
  @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
231
-
234
+
232
235
  get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
233
-
236
+
234
237
  end
235
-
238
+
236
239
  if !n.nil?
240
+ @@ongoing_stats[:sequence_count] += 1
241
+ @@ongoing_stats[:smallest_sequence_size] = [f.size, @@ongoing_stats[:smallest_sequence_size]].min
242
+ @@ongoing_stats[:biggest_sequence_size] = [f.size, @@ongoing_stats[:smallest_sequence_size]].max
243
+
237
244
  @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
238
245
  end
239
246
  end while (!n.nil? && @@params.repeated_seq?(n))
@@ -247,50 +254,9 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
247
254
 
248
255
  end
249
256
 
250
- # def next_work
251
- #
252
- # if @@exit
253
- # return nil
254
- # end
255
- # group = SequenceGroup.new
256
- #
257
- # @@chunk_size.times do
258
- # begin
259
- #
260
- # n,f,q,c = @@fqr.next_seq
261
- #
262
- # if !n.nil? && @@params.repeated_seq?(n)
263
- # @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
264
- # @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
265
- #
266
- # get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
267
- #
268
- # end
269
- # if !n.nil?
270
- # @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
271
- # end
272
- # end while (!n.nil? && @@params.repeated_seq?(n))
273
- #
274
- # if !n.nil?
275
- # # @@full_stats.add_stats({'sequences' => {'count' => {'processed' => 1}}})
276
- # group.push SequenceWithAction.new(n,f.upcase,q,c)
277
- # else
278
- # break
279
- # end
280
- # end
281
- #
282
- # # puts "Processing #{group.inspect}"
283
- #
284
- # if group.empty?
285
- # return nil
286
- # else
287
- # return group
288
- # end
289
- #
290
- # end
291
257
 
292
258
  def work_received(obj)
293
-
259
+
294
260
  res = obj
295
261
 
296
262
  # collect stats
@@ -19,6 +19,9 @@
19
19
  #
20
20
  # $: << File.expand_path(ROOT_PATH)
21
21
 
22
+ $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
23
+ $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
24
+
22
25
  require 'seqtrimnext'
23
26
 
24
27
  $SEQTRIM_PATH = ROOT_PATH
@@ -37,7 +40,7 @@ ENV['BLASTDB']=$FORMATTED_DB_PATH
37
40
  OUTPUT_PATH='output_files'
38
41
 
39
42
  puts "FORMATTED_DB_BLAST in workers: #{$FORMATTED_DB_PATH}"
40
- # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
43
+
41
44
 
42
45
  require 'scbi_mapreduce'
43
46
  require 'params'
@@ -10,8 +10,13 @@ class GraphStats
10
10
  init_stats=initial_stats
11
11
 
12
12
  if init_stats.nil?
13
- r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
14
- init_stats= JSON::parse(r)
13
+ if File.exists?(File.join(OUTPUT_PATH,'initial_stats.json'))
14
+ r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
15
+ init_stats= JSON::parse(r)
16
+ else
17
+ init_stats=[]
18
+ end
19
+
15
20
  end
16
21
  # puts init_stats.to_json
17
22
  #r=File.read(File.join(File.dirname(__FILE__),'stats.json'))
@@ -5,8 +5,6 @@
5
5
 
6
6
  require 'extract_stats'
7
7
 
8
- # $: << File.expand_path('~/progs/ruby/gems/scbi_drb/lib')
9
-
10
8
  require 'scbi_mapreduce'
11
9
  require 'seqtrim_work_manager'
12
10
  require 'action_manager'
@@ -340,6 +338,9 @@ class Seqtrim
340
338
  # server = ScbiMapreduce::Manager.new(ip,port, workers, SeqtrimWorkManager,custom_worker_file, STDOUT,'~/.seqtrimnext')
341
339
  server = ScbiMapreduce::Manager.new(ip,port, workers, SeqtrimWorkManager,custom_worker_file, STDOUT,$SEQTRIMNEXT_INIT)
342
340
  server.chunk_size=chunk_size
341
+ server.checkpointing=true
342
+ server.keep_order=true
343
+ server.retry_stuck_jobs=true
343
344
  server.start_server
344
345
 
345
346
  # close sequence reader
@@ -77,7 +77,7 @@ class SequenceWithAction < Sequence
77
77
 
78
78
  # Adds a new action to the sequence
79
79
  def add_action(a)
80
- $LOG.info("Adding action #{a.type} to #{seq_name}")
80
+ $LOG.debug("Adding action #{a.type} to #{seq_name}")
81
81
 
82
82
  @actions.push a
83
83
 
@@ -25,7 +25,7 @@ class PluginAbAdapters < Plugin
25
25
  def do_blasts(seqs)
26
26
  # find MIDS with less results than max_target_seqs value
27
27
  blast=BatchBlast.new("-db #{@params.get_param('adapters_ab_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
28
- $LOG.info('BLAST:'+blast.get_blast_cmd)
28
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
29
29
 
30
30
  fastas=[]
31
31
 
@@ -61,7 +61,7 @@ class PluginAbAdapters < Plugin
61
61
  raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
62
62
  end
63
63
 
64
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
64
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
65
65
 
66
66
 
67
67
  # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters_ab.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_ab')} -perc_identity #{@params.get_param('blast_percent_ab')} -word_size #{MIN_ADAPTER_SIZE}")
@@ -25,7 +25,7 @@ class PluginAdapters < Plugin
25
25
  def do_blasts(seqs)
26
26
  # find MIDS with less results than max_target_seqs value
27
27
  blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
28
- $LOG.info('BLAST:'+blast.get_blast_cmd)
28
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
29
29
 
30
30
  fastas=[]
31
31
 
@@ -64,7 +64,7 @@ class PluginAdapters < Plugin
64
64
  # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
65
65
  end
66
66
 
67
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
67
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
68
68
 
69
69
 
70
70
  # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')} -word_size #{MIN_ADAPTER_SIZE}")
@@ -0,0 +1,165 @@
1
+ require "plugin"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute PluginAdapters
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class PluginAdaptersOld < Plugin
11
+
12
+ def get_type_adapter(p_start,p_end,seq)
13
+ #if q_beg is nearer the left, add adapter action by the left,
14
+ #if q_end esta is nearer the right , add adapter action by the right
15
+ #NOTE: If the adapter is very near from left and rigth,
16
+ #then the sequence isn't valid, because almost sequence is adapter.
17
+
18
+
19
+ v1= p_end.to_i
20
+ v2= p_start.to_i
21
+
22
+ # puts " startadapter #{v2} endadapter #{v1} insert_start #{seq.insert_start} insert_end #{seq.insert_end}"
23
+
24
+ # puts " #{v2+seq.insert_start} <? #{seq.seq_fasta.length - v1 - 1 + seq.seq_fasta_orig.length - seq.insert_end-1}"
25
+ if (v2+seq.insert_start < (seq.seq_fasta.length - v1 - 1+ seq.seq_fasta_orig.length - seq.insert_end-1)) #IF THE NEAREST ONE IS THE LEFT
26
+ type = "ActionLeftAdapter"
27
+
28
+ else
29
+ type = "ActionRightAdapter"
30
+
31
+ end
32
+ return type
33
+ end
34
+
35
+
36
+ def cut_by_right(adapter,seq)
37
+
38
+ left_size = adapter.q_beg-seq.insert_start+1
39
+ right_size = seq.insert_end-adapter.q_end+1
40
+ left_size=0 if (left_size<0)
41
+ right_size=0 if (right_size<0)
42
+
43
+ return (left_size>(right_size/2).to_i)
44
+
45
+ end
46
+
47
+ #Begins the plugin1's execution to warn that there are contaminants in the sequence "seq"
48
+ def execute(seqs)
49
+ blasts= do_blasts(seqs)
50
+
51
+ seqs.each_with_index do |s,i|
52
+ exec_seq(s,blasts.querys[i])
53
+ end
54
+ end
55
+
56
+ def do_blasts(seqs)
57
+ # find MIDS with less results than max_target_seqs value
58
+ blast=BatchBlast.new("-db #{@params.get_param('adapters_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
59
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
60
+
61
+ fastas=[]
62
+
63
+ seqs.each do |seq|
64
+ fastas.push ">"+seq.seq_name
65
+ fastas.push seq.seq_fasta
66
+ end
67
+
68
+ # fastas=fastas.join("\n")
69
+
70
+ blast_table_results = blast.do_blast(fastas)
71
+
72
+ # puts blast_table_results.inspect
73
+
74
+ return blast_table_results
75
+ end
76
+
77
+
78
+ def exec_seq(seq,blast_query)
79
+ if blast_query.query_id != seq.seq_name
80
+ raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
81
+ end
82
+
83
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for adapters into the sequence"
84
+
85
+
86
+ # blast=BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'adapters.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_adapters')} -perc_identity #{@params.get_param('blast_percent_adapters')}")
87
+
88
+ # blast with only one sequence, no with many sequences from a database
89
+ #---------------------------------------------------------------------
90
+
91
+ # blast_table_results = blast.do_blast(seq.seq_fasta) #rise seq to adapterss executing over blast
92
+
93
+ #blast_table_results = BlastTableResult.new(res)
94
+
95
+ # blast_table_results.inspect
96
+
97
+ adapters=[]
98
+ # blast_table_results.querys.each do |query| # first round to save adapters without overlap
99
+ merge_hits(blast_query,adapters)
100
+ # end
101
+
102
+ begin
103
+ adapters2=adapters # second round to save adapters without overlap
104
+ adapters = []
105
+ merge_hits(adapters2,adapters)
106
+ end until (adapters2.count == adapters.count)
107
+
108
+ actions=[]
109
+ adapter_size=0
110
+ # @stats['adapter_size']={}
111
+ adapters.each do |ad| # adds the correspondent action to the sequence
112
+
113
+ type = get_type_adapter(ad.q_beg,ad.q_end,seq)
114
+ a = seq.new_action(ad.q_beg,ad.q_end,type)
115
+ # puts " state left_action #{a.left_action} right_action #{a.right_action}"
116
+
117
+
118
+ adapter_size=ad.q_end-ad.q_beg+1
119
+
120
+ if cut_by_right(ad,seq)
121
+
122
+ # puts "action right end1 #{seq.insert_end}"
123
+
124
+ a.right_action=true #mark rigth action to get the left insert
125
+ else
126
+
127
+ # puts " cut1 by left #{seq.insert_start} ad #{ad.q_beg+seq.insert_start} #{ad.q_end+seq.insert_start}"
128
+
129
+ a.left_action = true #mark left action to get the right insert
130
+
131
+ end
132
+
133
+ a.message = ad.subject_id
134
+ a.reversed = ad.reversed
135
+ actions.push a
136
+
137
+ # @stats[:adapter_size]={adapter_size => 1}
138
+ add_stats('adapter_size',adapter_size)
139
+
140
+ end
141
+ seq.add_actions(actions)
142
+ #
143
+ end
144
+
145
+ #Returns an array with the errors due to parameters are missing
146
+ def self.check_params(params)
147
+ errors=[]
148
+
149
+ comment='Blast E-value used as cut-off when searching for adapters or primers'
150
+ default_value = 1e-6
151
+ params.check_param(errors,'blast_evalue_adapters','Float',default_value,comment)
152
+
153
+ comment='Minimum required identity (%) for a reliable adapter'
154
+ default_value = 95
155
+ params.check_param(errors,'blast_percent_adapters','Integer',default_value,comment)
156
+
157
+ comment='Path for adapter database'
158
+ default_value = File.join($FORMATTED_DB_PATH,'adapters.fasta')
159
+ params.check_param(errors,'adapters_db','DB',default_value,comment)
160
+
161
+ return errors
162
+ end
163
+
164
+
165
+ end
@@ -25,7 +25,7 @@ class PluginAmplicons < Plugin
25
25
  def do_blasts(seqs)
26
26
  # find MIDS with less results than max_target_seqs value
27
27
  blast=BatchBlast.new("-db #{@params.get_param('primers_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_primers')}")
28
- $LOG.info('BLAST:'+blast.get_blast_cmd)
28
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
29
29
 
30
30
  fastas=[]
31
31
 
@@ -49,7 +49,7 @@ class PluginAmplicons < Plugin
49
49
  raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
50
50
  end
51
51
 
52
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for primers into the sequence"
52
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for primers into the sequence"
53
53
 
54
54
  # puts blast_query.inspect
55
55
 
@@ -36,7 +36,7 @@ class PluginContaminants < Plugin
36
36
 
37
37
  blast = BatchBlast.new("-db #{@params.get_param('contaminants_db')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_contaminants')} -perc_identity #{@params.get_param('blast_percent_contaminants')} -culling_limit 1") #get contaminants -max_target_seqs #{MAX_TARGETS_SEQS}
38
38
 
39
- $LOG.info('BLAST:'+blast.get_blast_cmd(:xml))
39
+ $LOG.debug('BLAST:'+blast.get_blast_cmd(:xml))
40
40
 
41
41
  fastas=[]
42
42
 
@@ -67,7 +67,7 @@ class PluginContaminants < Plugin
67
67
  # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
68
68
  end
69
69
 
70
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
70
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for contaminants into the sequence"
71
71
 
72
72
 
73
73
  #blast = BatchBlast.new('-db DB/formatted/contaminants.fasta','blastn',' -task blastn -evalue 1e-10 -perc_identity 95') #get contaminants
@@ -143,7 +143,7 @@ class PluginContaminants < Plugin
143
143
 
144
144
  end
145
145
  else
146
- $LOG.info('Contaminant ignored due to genus match: '+c.definition)
146
+ $LOG.debug('Contaminant ignored due to genus match: '+c.definition)
147
147
  end
148
148
  end
149
149
 
@@ -286,7 +286,7 @@ class PluginExtractInserts < Plugin
286
286
 
287
287
 
288
288
  def exec_seq(seq)
289
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
289
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: extract inserts"
290
290
 
291
291
  # puts "INSERTO ANTES LINKER INSERT:"+seq.seq_fasta
292
292
 
@@ -319,7 +319,7 @@ class PluginFindPolyAt < Plugin
319
319
 
320
320
 
321
321
  def exec_seq(seq)
322
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for strings of polyAT's into the sequence with a length indicated by the param <poly_at_length>"
322
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for strings of polyAT's into the sequence with a length indicated by the param <poly_at_length>"
323
323
 
324
324
  find_polyT(seq)
325
325
  find_polyA(seq)
@@ -21,7 +21,7 @@ def execute(seqs)
21
21
 
22
22
 
23
23
  def exec_seq(seq)
24
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: searching sequence repeated at input file"
24
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: searching sequence repeated at input file"
25
25
 
26
26
  fasta_input=@params.get_param('truncated_input_file')
27
27
 
@@ -149,7 +149,7 @@ class PluginIndeterminations < Plugin
149
149
 
150
150
 
151
151
  def exec_seq(seq)
152
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+"
152
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing indeterminations N+"
153
153
 
154
154
  actions=[]
155
155
 
@@ -21,7 +21,7 @@ class PluginKey < Plugin
21
21
 
22
22
 
23
23
  def exec_seq(seq)
24
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
24
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: marking key into the sequence"
25
25
  # blast_table_results.inspect
26
26
 
27
27
  actions=[]
@@ -83,7 +83,7 @@ class PluginLinker < Plugin
83
83
  # find MIDS with less results than max_target_seqs value
84
84
  blast = BatchBlast.new("-db #{@params.get_param('linkers_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
85
85
 
86
- $LOG.info('BLAST:'+blast.get_blast_cmd)
86
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
87
87
 
88
88
  fastas=[]
89
89
 
@@ -106,7 +106,7 @@ class PluginLinker < Plugin
106
106
  if blast_query.query_id != seq.seq_name
107
107
  raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
108
108
  end
109
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
109
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for linker into the sequence"
110
110
 
111
111
  # key_beg,key_end=search_key(seq,0,3) if false
112
112
  # blast = BatchBlast.new("-subject #{File.join($FORMATTED_DB_PATH,'linkers.fasta')}",'blastn'," -task blastn -evalue #{@params.get_param('blast_evalue_linkers')} -perc_identity #{@params.get_param('blast_percent_linkers')}") #get linkers
@@ -22,7 +22,7 @@ class PluginLowHighSize < Plugin
22
22
 
23
23
 
24
24
  def exec_seq(seq)
25
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
25
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low or high size of the sequence"
26
26
 
27
27
  min_size = @params.get_param('min_sequence_size_raw').to_i #min_size is: mean - 2dev
28
28
  max_size = @params.get_param('max_sequence_size_raw').to_i #max_size is: mean + 2dev
@@ -278,7 +278,7 @@ class PluginLowQuality < Plugin
278
278
  if ((self.class.to_s=='PluginLowQuality') && seq.seq_qual.nil? )
279
279
  $LOG.error " Quality File haven't been provided. It's impossible to execute " + self.class.to_s
280
280
  elsif (seq.seq_qual.size>0)
281
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
281
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking low quality of the sequence"
282
282
 
283
283
  @low=@params.get_param('min_quality').to_i
284
284
 
@@ -29,7 +29,7 @@ class PluginMids < Plugin
29
29
  def do_blasts(seqs)
30
30
  # find MIDS with less results than max_target_seqs value
31
31
  blast = BatchBlast.new("-db #{@params.get_param('mids_db')}",'blastn'," -task blastn-short -perc_identity #{@params.get_param('blast_percent_mids')} -max_target_seqs 4 ") #get mids
32
- $LOG.info('BLAST:'+blast.get_blast_cmd)
32
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
33
33
 
34
34
  fastas=[]
35
35
 
@@ -54,7 +54,7 @@ class PluginMids < Plugin
54
54
  end
55
55
 
56
56
 
57
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for mids into the sequence"
57
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for mids into the sequence"
58
58
 
59
59
 
60
60
  # blast_table_results = blast.do_blast(seq.seq_fasta[0..SIZE_SEARCH_MID]) # execute blast to find mids
@@ -27,7 +27,7 @@ class PluginRemAditArtifacts < Plugin
27
27
 
28
28
  def exec_seq(seq)
29
29
 
30
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
30
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: removing artifacts into the sequence"
31
31
  seq2 = seq.seq_fasta
32
32
  first = 0
33
33
  last = seq2.size-1
@@ -96,7 +96,7 @@ class PluginShortInsert < Plugin
96
96
 
97
97
  def exec_seq(seq)
98
98
 
99
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
99
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
100
100
  # puts "inserto #{seq.insert_start}, #{seq.insert_end} size #{seq.seq_fasta.size}"
101
101
 
102
102
  if (seq.seq_fasta.size > 0)
@@ -183,7 +183,7 @@ class PluginShortInsert < Plugin
183
183
 
184
184
  #Begins the plugin1's execution to warn if the inserted is so short
185
185
  def execute_no_cut_quality(seq)
186
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
186
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: checking if insert of sequence has enought size"
187
187
 
188
188
 
189
189
 
@@ -38,7 +38,7 @@ class PluginVectors < Plugin
38
38
  # find MIDS with less results than max_target_seqs value
39
39
  blast = BatchBlast.new("-db #{@params.get_param('vectors_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_vectors')} -perc_identity #{@params.get_param('blast_percent_vectors')} -culling_limit 1") #get vectors
40
40
 
41
- $LOG.info('BLAST:'+blast.get_blast_cmd)
41
+ $LOG.debug('BLAST:'+blast.get_blast_cmd)
42
42
 
43
43
  fastas=[]
44
44
 
@@ -62,7 +62,7 @@ class PluginVectors < Plugin
62
62
  # raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
63
63
  end
64
64
 
65
- $LOG.info "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
65
+ $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence "
66
66
 
67
67
  #blast contra contaminantes
68
68
 
@@ -0,0 +1,5 @@
1
+ # ======================================
2
+ # General parameters GENOMICS illumina
3
+ # ======================================
4
+
5
+ plugin_list = PluginLowHighSize,PluginIndeterminations,PluginContaminants,PluginLowQuality
@@ -0,0 +1,8 @@
1
+ # ======================================
2
+ # General parameters
3
+ # ======================================
4
+
5
+
6
+ plugin_list = PluginLowHighSize,PluginIndeterminations,PluginFindPolyAt,PluginContaminants,PluginLowQuality,PluginLowComplexity
7
+
8
+ contaminants_db="contaminants.fasta cont_ribosome.fasta"
@@ -15,8 +15,9 @@ def add_stats(h_stats)
15
15
  add_stats.each do |property,hash_value|
16
16
  h[plugin_hash][property]={} if h[plugin_hash][property].nil?
17
17
 
18
+ # values need to be in string format because of later loading from json file
18
19
  hash_value.each do |value, count|
19
- h[plugin_hash][property][value]=(h[plugin_hash][property][value]||0) + count
20
+ h[plugin_hash][property][value.to_s]=(h[plugin_hash][property][value.to_s]||0) + count
20
21
  end
21
22
  end
22
23
  end
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.45'
33
+ VERSION = '2.0.46'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.45
5
+ version: 2.0.46
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-03-05 00:00:00 Z
13
+ date: 2012-04-13 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray
@@ -143,9 +143,11 @@ executables:
143
143
  - extract_seqs_from_fastq.rb
144
144
  - fasta2fastq.rb
145
145
  - fastq2fasta.rb
146
+ - filter_database.rb
146
147
  - gen_qual.rb
147
148
  - get_seq.rb
148
149
  - group_by_range.rb
150
+ - join_big_illumina_paired.sh
149
151
  - join_ilumina_paired.rb
150
152
  - parse_amplicons.rb
151
153
  - parse_json_results.rb
@@ -160,14 +162,16 @@ executables:
160
162
  extensions: []
161
163
 
162
164
  extra_rdoc_files:
165
+ - History.txt
163
166
  - lib/seqtrimnext/templates/amplicons.txt
164
167
  - lib/seqtrimnext/templates/genomics_454.txt
165
168
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
169
+ - lib/seqtrimnext/templates/genomics_illumina.txt
166
170
  - lib/seqtrimnext/templates/low_quality.txt
167
171
  - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
168
172
  - lib/seqtrimnext/templates/transcriptomics_454.txt
173
+ - lib/seqtrimnext/templates/transcriptomics_illumina.txt
169
174
  - lib/seqtrimnext/templates/transcriptomics_plants.txt
170
- - History.txt
171
175
  - Manifest.txt
172
176
  - PostInstall.txt
173
177
  files:
@@ -177,9 +181,11 @@ files:
177
181
  - bin/extract_seqs_from_fastq.rb
178
182
  - bin/fasta2fastq.rb
179
183
  - bin/fastq2fasta.rb
184
+ - bin/filter_database.rb
180
185
  - bin/gen_qual.rb
181
186
  - bin/get_seq.rb
182
187
  - bin/group_by_range.rb
188
+ - bin/join_big_illumina_paired.sh
183
189
  - bin/join_ilumina_paired.rb
184
190
  - bin/parse_amplicons.rb
185
191
  - bin/parse_json_results.rb
@@ -191,6 +197,7 @@ files:
191
197
  - bin/split_fastq.rb
192
198
  - bin/split_ilumina_paired.rb
193
199
  - bin/split_paired.rb
200
+ - History.txt
194
201
  - lib/seqtrimnext/actions/action_ab_adapter.rb
195
202
  - lib/seqtrimnext/actions/action_ab_far_adapter.rb
196
203
  - lib/seqtrimnext/actions/action_ab_left_adapter.rb
@@ -208,13 +215,13 @@ files:
208
215
  - lib/seqtrimnext/actions/action_low_high_size.rb
209
216
  - lib/seqtrimnext/actions/action_low_quality.rb
210
217
  - lib/seqtrimnext/actions/action_mid.rb
218
+ - lib/seqtrimnext/actions/action_middle_adapter.rb
211
219
  - lib/seqtrimnext/actions/action_multiple_linker.rb
212
220
  - lib/seqtrimnext/actions/action_paired_reads.rb
213
221
  - lib/seqtrimnext/actions/action_poly_a.rb
214
222
  - lib/seqtrimnext/actions/action_poly_t.rb
215
223
  - lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
216
224
  - lib/seqtrimnext/actions/action_right_adapter.rb
217
- - lib/seqtrimnext/actions/action_middle_adapter.rb
218
225
  - lib/seqtrimnext/actions/action_right_primer.rb
219
226
  - lib/seqtrimnext/actions/action_short_insert.rb
220
227
  - lib/seqtrimnext/actions/action_unexpected_poly_t.rb
@@ -244,6 +251,7 @@ files:
244
251
  - lib/seqtrimnext/plugins/plugin.rb
245
252
  - lib/seqtrimnext/plugins/plugin_ab_adapters.rb
246
253
  - lib/seqtrimnext/plugins/plugin_adapters.rb
254
+ - lib/seqtrimnext/plugins/plugin_adapters_old.rb
247
255
  - lib/seqtrimnext/plugins/plugin_amplicons.rb
248
256
  - lib/seqtrimnext/plugins/plugin_contaminants.rb
249
257
  - lib/seqtrimnext/plugins/plugin_extract_inserts.rb
@@ -262,9 +270,11 @@ files:
262
270
  - lib/seqtrimnext/templates/amplicons.txt
263
271
  - lib/seqtrimnext/templates/genomics_454.txt
264
272
  - lib/seqtrimnext/templates/genomics_454_with_paired.txt
273
+ - lib/seqtrimnext/templates/genomics_illumina.txt
265
274
  - lib/seqtrimnext/templates/low_quality.txt
266
275
  - lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
267
276
  - lib/seqtrimnext/templates/transcriptomics_454.txt
277
+ - lib/seqtrimnext/templates/transcriptomics_illumina.txt
268
278
  - lib/seqtrimnext/templates/transcriptomics_plants.txt
269
279
  - lib/seqtrimnext/utils/extract_samples.rb
270
280
  - lib/seqtrimnext/utils/fasta2xml.rb
@@ -276,7 +286,6 @@ files:
276
286
  - lib/seqtrimnext/utils/recover_mid.rb
277
287
  - lib/seqtrimnext/utils/string_utils.rb
278
288
  - lib/seqtrimnext.rb
279
- - History.txt
280
289
  - Manifest.txt
281
290
  - PostInstall.txt
282
291
  - Rakefile