seqtrimnext 2.0.51 → 2.0.52

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -2,25 +2,25 @@
2
2
  #finds the classes that were in the folder 'classes'
3
3
 
4
4
  # ROOT_PATH=File.dirname(File.dirname(File.dirname(__FILE__)))
5
- #
5
+ #
6
6
  # $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
7
7
  # $: << File.expand_path(File.join(ROOT_PATH, 'classes','blast'))
8
- #
8
+ #
9
9
  # #finds the classes that were in the folder 'plugins'
10
10
  # $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
11
- #
11
+ #
12
12
  # #finds the classes that were in the folder 'plugins'
13
13
  # $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
14
- #
14
+ #
15
15
  # #finds the classes that were in the folder 'utils'
16
16
  # $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
17
- #
17
+ #
18
18
  # $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
19
- #
19
+ #
20
20
  # $: << File.expand_path(ROOT_PATH)
21
21
 
22
22
  $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
23
- $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
23
+ # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
24
24
 
25
25
  require 'seqtrimnext'
26
26
 
@@ -32,7 +32,7 @@ if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
32
32
  $DB_PATH = File.dirname($FORMATTED_DB_PATH)
33
33
  else
34
34
  $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
35
- $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
35
+ $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
36
36
  end
37
37
 
38
38
  ENV['BLASTDB']=$FORMATTED_DB_PATH
@@ -47,254 +47,389 @@ require 'params'
47
47
  require 'action_manager'
48
48
  require 'plugin_manager'
49
49
  # require 'sequence_with_action'
50
- #
50
+ #
51
51
  require 'scbi_fastq'
52
52
  require 'sequence_group'
53
53
 
54
54
  class SeqtrimWorker < ScbiMapreduce::Worker
55
55
 
56
- def process_object(obj)
57
- running_seqs=SequenceGroup.new(obj)
58
-
59
- # execute plugins
60
- @plugin_manager.execute_plugins(running_seqs)
61
-
62
- # add output data
63
- add_output_data(running_seqs)
64
-
65
- return running_seqs
66
- end
67
-
68
- def receive_initial_config(obj)
69
-
70
- # Reads the parameters
71
- $WORKER_LOG.info "Params received"
72
- # @params = Params.new(params_path)
73
- @params = obj
74
-
75
- @use_qual=@params.get_param('use_qual')
76
- @use_json=@params.get_param('use_json')
77
- end
56
+ def process_object(obj)
78
57
 
79
- def starting_worker
80
-
81
- # $WORKER_LOG.level = Logger::ERROR
82
- $WORKER_LOG.level = Logger::WARN
83
- $WORKER_LOG.info "Loading actions"
84
-
85
- @action_manager = ActionManager.new
86
-
87
- $WORKER_LOG.info "Loading plugins"
88
- @plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array
89
- $WORKER_LOG.info "PLUGIN LIST:" + @plugin_list
90
-
91
- @plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it
92
-
93
- rescue Exception => e
94
- puts (e.message+ e.backtrace.join("\n"))
95
-
96
- end
97
-
98
-
99
- def closing_worker
100
-
101
- end
102
-
103
-
104
- def add_output_data(obj)
105
- obj.output_text=[]
106
-
107
- obj.each do |seq|
108
- obj.output_text << seq.to_text
109
- write_seq_to_files(obj.output_files,seq, obj.stats)
110
- end
111
-
112
- # @remove seqs since they are not needed anymore to write output files
113
- obj.remove_all_seqs
114
- end
115
-
116
- def add_stat(stats,key,subkey,value,count=1)
117
-
118
- stats[key]={} if !stats[key]
119
- stats[key][subkey]={} if !stats[key][subkey]
120
- stats[key][subkey][value]=0 if !stats[key][subkey][value]
121
-
122
- stats[key][subkey][value]+=count
123
- end
58
+ running_seqs=SequenceGroup.new(obj.flatten)
124
59
 
125
- def write_seq_to_files(files,seq, stats)
126
- # puts stats.to_json
127
-
128
- dir_name,file_name=seq.get_file_tag_path
129
- # puts File.join(dir_name,'sequences_'+file_name)
130
-
131
- # get current inserts
132
- inserts = seq.get_inserts
133
-
134
- # qualities are optional
135
- if @use_qual
136
- qual_inserts = seq.get_qual_inserts
137
- end
138
-
139
- # save json if necessary
140
- if @use_json
141
- json_file(files)<< seq.to_json
142
- end
143
-
144
- # find mids
145
- mid = seq.get_actions(ActionMid).first
146
-
147
-
148
- if (seq.seq_rejected) # sequence rejected
149
-
150
- #save to rejected sequences
151
- message = seq.seq_rejected_by_message
152
- rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
153
-
154
- add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message)
155
- add_stat(stats,'sequences','count','rejected')
156
-
157
-
158
- elsif (inserts.empty?) #sequence with no inserts
159
- message = 'No valid inserts found'
160
- rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
161
-
162
- add_stat(stats,'sequences','rejected',message)
163
- add_stat(stats,'sequences','count','rejected')
164
-
165
- elsif (inserts.count == 2) # sequence with two inserts = PAIRED SEQUENCES
166
- add_stat(stats,'sequences','count','output_seqs_paired')
167
-
168
- # TODO - Add this stats to full stats
169
- # @@full_stats.add_stats({'sequences' => {'paired' => {'count' => 1}}})
170
-
171
- if (mid.nil? || (mid.message=='no_MID') ) # without mid
172
- mid_id = 'no_MID'
173
- mid_message = ' No MID found'
174
- else
175
- mid_id = mid.tag_id
176
- mid_message=''
177
- if mid_id != mid_message
178
- mid_message = ' '+mid.message
179
- end
180
- end
181
-
182
- # fasta_file = get_paired_file(mid_id)
183
-
184
- n="#{seq.seq_name}_left"
185
- c="template=#{seq.seq_name} dir=R library=#{mid_id}"
186
- f=inserts[0].reverse.tr('actgACTG','tgacTGAC')
187
- q=[]
188
- if @use_qual
189
- q=qual_inserts[0].reverse
190
- end
191
-
192
- paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
193
-
194
-
195
- n="#{seq.seq_name}_right"
196
- c="template=#{seq.seq_name} dir=F library=#{mid_id}"
197
- f=inserts[1]
198
- q=[]
199
- if @use_qual
200
- q=qual_inserts[1]
201
- end
202
-
203
- paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
204
-
205
-
206
- elsif (inserts.count == 1) # sequence with one insert
207
-
208
- if (mid.nil? || (mid.message=='no_MID') ) # without mid
209
- mid_id = 'no_MID'
210
- mid_message = ' No MID found'
211
- else
212
- mid_id = mid.tag_id
213
- mid_message=''
214
- if mid_id != mid_message
215
- mid_message = ' '+mid.message
216
- end
217
- end
218
-
219
- # save fasta and qual in no MID file
220
- has_low_complexity = seq.get_actions(ActionLowComplexity)
221
-
222
- if has_low_complexity.empty?
223
- add_stat(stats,'sequences','count','output_seqs')
224
-
225
- # fasta_file = get_sequence_file(mid_id)
226
- # sff_file=get_sffinfo_file(mid_id)
227
- fasta_file=sequence_file(files,dir_name,file_name)
228
- sff_file=sffinfo_file(files,dir_name,file_name)
229
- else
230
- add_stat(stats,'sequences','count','output_seqs_low_complexity')
231
-
232
- # fasta_file = get_low_complexity_file(mid_id)
233
- # sff_file=get_low_sffinfo_file(mid_id)
234
- fasta_file=low_complexity_file(files,dir_name,file_name)
235
- sff_file=low_sffinfo_file(files,dir_name,file_name)
60
+ # execute plugins
61
+ @plugin_manager.execute_plugins(running_seqs)
62
+
63
+ # add output data
64
+ add_output_data(running_seqs)
65
+
66
+ return running_seqs
67
+ end
68
+
69
+ def receive_initial_config(obj)
70
+
71
+ # Reads the parameters
72
+ $WORKER_LOG.info "Params received"
73
+ # @params = Params.new(params_path)
74
+ @params = obj
75
+ @tuple_size=@params.get_param('tuple_size')
76
+
77
+ @use_qual=@params.get_param('use_qual')
78
+ @use_json=@params.get_param('use_json')
79
+ end
80
+
81
+ def starting_worker
82
+
83
+ # $WORKER_LOG.level = Logger::ERROR
84
+ $WORKER_LOG.level = Logger::WARN
85
+ $WORKER_LOG.info "Loading actions"
86
+
87
+ @action_manager = ActionManager.new
88
+
89
+ $WORKER_LOG.info "Loading plugins"
90
+ @plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array
91
+ $WORKER_LOG.info "PLUGIN LIST:" + @plugin_list
92
+
93
+ @plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it
94
+
95
+ rescue Exception => e
96
+ puts (e.message+ e.backtrace.join("\n"))
97
+
98
+ end
99
+
100
+
101
+ def closing_worker
102
+
103
+ end
104
+
105
+
106
+ def add_output_data(obj)
107
+ obj.output_text=[]
108
+
109
+ if @tuple_size>1
110
+ obj.each_slice(@tuple_size) do |seqs|
111
+
112
+ write_seq_to_files_tuple(obj.output_files,seqs, obj.stats)
113
+
114
+ seqs.each do |seq|
115
+ obj.output_text << seq.to_text
236
116
  end
237
-
238
- q=[]
239
- if @use_qual
240
- q=qual_inserts[0]
241
- end
242
-
243
- n=seq.seq_name
244
- c=mid_message
245
- f=inserts[0]
246
-
247
- fasta_file << FastqFile.to_fastq(n,f,q,c)
248
-
249
- inserts_pos = seq.get_actions(ActionInsert)
250
-
251
- sff_file<< "#{n} #{inserts_pos[0].start_pos+1} #{inserts_pos[0].end_pos+1}"
252
-
253
117
  end
254
-
255
- end
256
-
257
-
258
- # ACCESS TO FILES
259
-
260
- def json_file(files)
261
- return get_file(files,File.join(OUTPUT_PATH,'results.json'))
262
- end
263
-
264
- def rejected_output_file(files)
265
- return get_file(files,File.join(OUTPUT_PATH,'rejected.txt'))
266
- end
267
-
268
-
269
- def sequence_file(files, dir_name, file_name)
270
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'sequences_'+file_name+'.fastq'))
271
- end
272
-
273
- def paired_file(files, dir_name, file_name)
274
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_'+file_name+'.fastq'))
118
+
119
+ else
120
+ obj.each do |seq|
121
+ write_seq_to_files_normal(obj.output_files,seq, obj.stats)
122
+ obj.output_text << seq.to_text
123
+ end
124
+ end
125
+
126
+ # @remove seqs since they are not needed anymore to write output files
127
+ obj.remove_all_seqs
128
+ end
129
+
130
+ def add_stat(stats,key,subkey,value,count=1)
131
+
132
+ stats[key]={} if !stats[key]
133
+ stats[key][subkey]={} if !stats[key][subkey]
134
+ stats[key][subkey][value]=0 if !stats[key][subkey][value]
135
+
136
+ stats[key][subkey][value]+=count
137
+ end
138
+
139
+ def write_seq_to_files_tuple(files,seqs, stats)
140
+
141
+
142
+ seq1=seqs[0]
143
+ seq2=seqs[1]
144
+
145
+ dir_name,file_name,priority=seq1.get_file_tag_path
146
+ dir_name2,file_name2,priority2=seq2.get_file_tag_path
147
+
148
+ # both paired sequences must go in same file, there are priorities
149
+ if (dir_name!=dir_name2) || (file_name!=file_name2)
150
+ if priority2>priority
151
+ dir_name=dir_name2
152
+ file_name=file_name2
153
+ end
275
154
  end
155
+
156
+ # get current inserts
157
+ inserts1 = seq1.get_inserts
158
+ inserts2 = seq2.get_inserts
276
159
 
277
- def low_complexity_file(files, dir_name, file_name)
278
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_'+file_name+'.fastq'))
160
+ # qualities are optional
161
+ if @use_qual
162
+ qual_inserts1 = seq1.get_qual_inserts
163
+ qual_inserts2 = seq2.get_qual_inserts
279
164
  end
280
-
281
- def sffinfo_file(files, dir_name, file_name)
282
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'sff_info_'+file_name+'.txt'))
165
+
166
+
167
+
168
+ # save json if necessary
169
+ if @use_json
170
+ json_file(files)<< seq1.to_json
171
+ json_file(files)<< seq2.to_json
283
172
  end
284
173
 
285
- def low_sffinfo_file(files, dir_name, file_name)
286
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_sff_info_'+file_name+'.txt'))
174
+ # find mids
175
+ mid1 = seq1.get_actions(ActionMid).first
176
+ mid2 = seq2.get_actions(ActionMid).first
177
+
178
+
179
+ if !inserts1.empty? && !inserts2.empty? # both have inserts
180
+ # save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
181
+ save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid1,dir_name,file_name)
182
+ else
183
+ save_rejected_empty_or_single(files,seq1, stats,inserts1,qual_inserts1,mid1,dir_name,file_name)
184
+ save_rejected_empty_or_single(files,seq2, stats,inserts2,qual_inserts2,mid2,dir_name,file_name)
185
+ end
186
+
187
+ end
188
+
189
+ def save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid,dir_name,file_name)
190
+
191
+ add_stat(stats,'sequences','count','output_seqs_paired')
192
+ add_stat(stats,'sequences','count','output_seqs_paired')
193
+
194
+ mid_id,mid_message=get_mid_message(mid)
195
+
196
+ # save left read
197
+ n="#{seq1.seq_name}"
198
+ c=seq1.get_comment_line # "template=#{seq1.seq_name} dir=R library=#{mid_id}"
199
+ f=inserts1[0]#.reverse.tr('actgACTG','tgacTGAC')
200
+ q=[]
201
+ if @use_qual
202
+ q=qual_inserts1[0] #.reverse
287
203
  end
288
-
289
- def get_file(files,fn)
290
- res=files[fn]
291
-
292
- if !res
293
- files[fn]=[]
294
- res=files[fn]
204
+
205
+ paired_file_ilu1(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
206
+
207
+ # save right read
208
+ n="#{seq2.seq_name}"
209
+ c=seq2.get_comment_line # "template=#{seq2.seq_name} dir=F library=#{mid_id}"
210
+ f=inserts2[0]
211
+ q=[]
212
+ if @use_qual
213
+ q=qual_inserts2[0]
214
+ end
215
+
216
+ paired_file_ilu2(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
217
+
218
+ end
219
+
220
+
221
+ def save_rejected_empty_or_single(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
222
+ if (seq.seq_rejected) # save to rejected sequences
223
+ save_rejected_seq(files,seq, stats)
224
+ elsif (inserts.empty?) #sequence with no inserts
225
+ save_empty_insert(files,seq, stats)
226
+ elsif (inserts.count == 1) # sequence with one insert
227
+ save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
228
+ end
229
+ end
230
+
231
+
232
+ # SAVE NORMAL ===============================
233
+ def save_rejected_seq(files,seq, stats)
234
+ # message = seq.seq_rejected_by_message
235
+ message= seq.get_comment_line
236
+ rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
237
+
238
+ add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message)
239
+ add_stat(stats,'sequences','count','rejected')
240
+ end
241
+
242
+ def save_empty_insert(files,seq, stats)
243
+ seq.seq_rejected=true
244
+ seq.seq_rejected_by_message='short insert'
245
+
246
+ message = 'No valid inserts found'
247
+
248
+ rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
249
+
250
+ add_stat(stats,'sequences','rejected',message)
251
+ add_stat(stats,'sequences','count','rejected')
252
+
253
+ end
254
+
255
+ def get_mid_message(mid)
256
+ if (mid.nil? || (mid.message=='no_MID') ) # without mid
257
+ mid_id = 'no_MID'
258
+ mid_message = ' No MID found'
259
+ else
260
+ mid_id = mid.tag_id
261
+ mid_message=''
262
+ if mid_id != mid_message
263
+ mid_message = ' '+mid.message
295
264
  end
265
+ end
266
+ return mid_id,mid_message
267
+ end
268
+
269
+ def save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
270
+ add_stat(stats,'sequences','count','output_seqs_paired')
271
+
272
+ mid_id,mid_message=get_mid_message(mid)
273
+
274
+ # save left read
275
+ n="#{seq.seq_name}_left"
276
+ c="template=#{seq.seq_name} dir=R library=#{mid_id} #{seq.get_comment_line}"
277
+ f=inserts[0].reverse.tr('actgACTG','tgacTGAC')
278
+ q=[]
279
+ if @use_qual
280
+ q=qual_inserts[0].reverse
281
+ end
282
+
283
+ paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
284
+
285
+ # save right read
286
+ n="#{seq.seq_name}_right"
287
+ c="template=#{seq.seq_name} dir=F library=#{mid_id} #{seq.get_comment_line}"
288
+ f=inserts[1]
289
+ q=[]
290
+ if @use_qual
291
+ q=qual_inserts[1]
292
+ end
293
+
294
+ paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
295
+
296
+ end
297
+
298
+ def save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
299
+ mid_id,mid_message=get_mid_message(mid)
300
+
301
+ # save fasta and qual in no MID file
302
+ has_low_complexity = seq.get_actions(ActionLowComplexity)
303
+
304
+ if has_low_complexity.empty?
305
+ add_stat(stats,'sequences','count','output_seqs')
306
+
307
+ fasta_file=sequence_file(files,dir_name,file_name)
308
+ sff_file=sffinfo_file(files,dir_name,file_name)
309
+ else
310
+ add_stat(stats,'sequences','count','output_seqs_low_complexity')
311
+
312
+ fasta_file=low_complexity_file(files,dir_name,file_name)
313
+ sff_file=low_sffinfo_file(files,dir_name,file_name)
314
+ end
315
+
316
+ q=[]
317
+ if @use_qual
318
+ q=qual_inserts[0]
319
+ end
320
+
321
+ n=seq.seq_name
322
+ c=mid_message
323
+
324
+ seq_comments=seq.get_comment_line
325
+ if !seq_comments.strip.empty?
326
+ c=seq_comments + c
327
+ end
328
+
329
+ f=inserts[0]
330
+
331
+ fasta_file << FastqFile.to_fastq(n,f,q,c)
332
+
333
+ inserts_pos = seq.get_actions(ActionInsert)
334
+
335
+ sff_file<< "#{n} #{inserts_pos[0].start_pos+1} #{inserts_pos[0].end_pos+1}"
336
+
337
+
338
+ end
339
+
340
+
341
+ def write_seq_to_files_normal(files,seq, stats)
342
+
343
+ # puts stats.to_json
344
+
345
+ dir_name,file_name,priority=seq.get_file_tag_path
346
+ # puts File.join(dir_name,'sequences_'+file_name)
347
+
348
+ # get current inserts
349
+ inserts = seq.get_inserts
350
+
351
+ # qualities are optional
352
+ if @use_qual
353
+ qual_inserts = seq.get_qual_inserts
354
+ end
355
+
356
+ # save json if necessary
357
+ if @use_json
358
+ json_file(files)<< seq.to_json
359
+ end
360
+
361
+ # find mids
362
+ mid = seq.get_actions(ActionMid).first
363
+
364
+
365
+ if (seq.seq_rejected) # save to rejected sequences
366
+ save_rejected_seq(files,seq, stats)
296
367
 
297
- return res
368
+ elsif (inserts.empty?) #sequence with no inserts
369
+ save_empty_insert(files,seq, stats)
370
+
371
+ elsif (inserts.count == 2) # sequence with two inserts = PAIRED SEQUENCES
372
+ save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
373
+
374
+ elsif (inserts.count == 1) # sequence with one insert
375
+ save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
376
+ end
377
+
378
+ end
379
+
380
+
381
+
382
+
383
+
384
+ # ACCESS TO FILES
385
+
386
+ def json_file(files)
387
+ return get_file(files,File.join(OUTPUT_PATH,'results.json'))
388
+ end
389
+
390
+ def rejected_output_file(files)
391
+ return get_file(files,File.join(OUTPUT_PATH,'rejected.txt'))
392
+ end
393
+
394
+
395
+ def sequence_file(files, dir_name, file_name)
396
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'sequences_'+file_name+'.fastq'))
397
+ end
398
+
399
+ def paired_file(files, dir_name, file_name)
400
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_'+file_name+'.fastq'))
401
+ end
402
+
403
+ def paired_file_ilu1(files, dir_name, file_name)
404
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_1_'+file_name+'.fastq'))
405
+ end
406
+
407
+ def paired_file_ilu2(files, dir_name, file_name)
408
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_2_'+file_name+'.fastq'))
409
+ end
410
+
411
+
412
+ def low_complexity_file(files, dir_name, file_name)
413
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_'+file_name+'.fastq'))
414
+ end
415
+
416
+ def sffinfo_file(files, dir_name, file_name)
417
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'sff_info_'+file_name+'.txt'))
418
+ end
419
+
420
+ def low_sffinfo_file(files, dir_name, file_name)
421
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_sff_info_'+file_name+'.txt'))
422
+ end
423
+
424
+ def get_file(files,fn)
425
+ res=files[fn]
426
+
427
+ if !res
428
+ files[fn]=[]
429
+ res=files[fn]
298
430
  end
299
-
431
+
432
+ return res
433
+ end
434
+
300
435
  end