seqtrimnext 2.0.51 → 2.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
@@ -2,25 +2,25 @@
2
2
  #finds the classes that were in the folder 'classes'
3
3
 
4
4
  # ROOT_PATH=File.dirname(File.dirname(File.dirname(__FILE__)))
5
- #
5
+ #
6
6
  # $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
7
7
  # $: << File.expand_path(File.join(ROOT_PATH, 'classes','blast'))
8
- #
8
+ #
9
9
  # #finds the classes that were in the folder 'plugins'
10
10
  # $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
11
- #
11
+ #
12
12
  # #finds the classes that were in the folder 'plugins'
13
13
  # $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
14
- #
14
+ #
15
15
  # #finds the classes that were in the folder 'utils'
16
16
  # $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
17
- #
17
+ #
18
18
  # $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
19
- #
19
+ #
20
20
  # $: << File.expand_path(ROOT_PATH)
21
21
 
22
22
  $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
23
- $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
23
+ # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
24
24
 
25
25
  require 'seqtrimnext'
26
26
 
@@ -32,7 +32,7 @@ if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
32
32
  $DB_PATH = File.dirname($FORMATTED_DB_PATH)
33
33
  else
34
34
  $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
35
- $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
35
+ $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
36
36
  end
37
37
 
38
38
  ENV['BLASTDB']=$FORMATTED_DB_PATH
@@ -47,254 +47,389 @@ require 'params'
47
47
  require 'action_manager'
48
48
  require 'plugin_manager'
49
49
  # require 'sequence_with_action'
50
- #
50
+ #
51
51
  require 'scbi_fastq'
52
52
  require 'sequence_group'
53
53
 
54
54
  class SeqtrimWorker < ScbiMapreduce::Worker
55
55
 
56
- def process_object(obj)
57
- running_seqs=SequenceGroup.new(obj)
58
-
59
- # execute plugins
60
- @plugin_manager.execute_plugins(running_seqs)
61
-
62
- # add output data
63
- add_output_data(running_seqs)
64
-
65
- return running_seqs
66
- end
67
-
68
- def receive_initial_config(obj)
69
-
70
- # Reads the parameters
71
- $WORKER_LOG.info "Params received"
72
- # @params = Params.new(params_path)
73
- @params = obj
74
-
75
- @use_qual=@params.get_param('use_qual')
76
- @use_json=@params.get_param('use_json')
77
- end
56
+ def process_object(obj)
78
57
 
79
- def starting_worker
80
-
81
- # $WORKER_LOG.level = Logger::ERROR
82
- $WORKER_LOG.level = Logger::WARN
83
- $WORKER_LOG.info "Loading actions"
84
-
85
- @action_manager = ActionManager.new
86
-
87
- $WORKER_LOG.info "Loading plugins"
88
- @plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array
89
- $WORKER_LOG.info "PLUGIN LIST:" + @plugin_list
90
-
91
- @plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it
92
-
93
- rescue Exception => e
94
- puts (e.message+ e.backtrace.join("\n"))
95
-
96
- end
97
-
98
-
99
- def closing_worker
100
-
101
- end
102
-
103
-
104
- def add_output_data(obj)
105
- obj.output_text=[]
106
-
107
- obj.each do |seq|
108
- obj.output_text << seq.to_text
109
- write_seq_to_files(obj.output_files,seq, obj.stats)
110
- end
111
-
112
- # @remove seqs since they are not needed anymore to write output files
113
- obj.remove_all_seqs
114
- end
115
-
116
- def add_stat(stats,key,subkey,value,count=1)
117
-
118
- stats[key]={} if !stats[key]
119
- stats[key][subkey]={} if !stats[key][subkey]
120
- stats[key][subkey][value]=0 if !stats[key][subkey][value]
121
-
122
- stats[key][subkey][value]+=count
123
- end
58
+ running_seqs=SequenceGroup.new(obj.flatten)
124
59
 
125
- def write_seq_to_files(files,seq, stats)
126
- # puts stats.to_json
127
-
128
- dir_name,file_name=seq.get_file_tag_path
129
- # puts File.join(dir_name,'sequences_'+file_name)
130
-
131
- # get current inserts
132
- inserts = seq.get_inserts
133
-
134
- # qualities are optional
135
- if @use_qual
136
- qual_inserts = seq.get_qual_inserts
137
- end
138
-
139
- # save json if necessary
140
- if @use_json
141
- json_file(files)<< seq.to_json
142
- end
143
-
144
- # find mids
145
- mid = seq.get_actions(ActionMid).first
146
-
147
-
148
- if (seq.seq_rejected) # sequence rejected
149
-
150
- #save to rejected sequences
151
- message = seq.seq_rejected_by_message
152
- rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
153
-
154
- add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message)
155
- add_stat(stats,'sequences','count','rejected')
156
-
157
-
158
- elsif (inserts.empty?) #sequence with no inserts
159
- message = 'No valid inserts found'
160
- rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
161
-
162
- add_stat(stats,'sequences','rejected',message)
163
- add_stat(stats,'sequences','count','rejected')
164
-
165
- elsif (inserts.count == 2) # sequence with two inserts = PAIRED SEQUENCES
166
- add_stat(stats,'sequences','count','output_seqs_paired')
167
-
168
- # TODO - Add this stats to full stats
169
- # @@full_stats.add_stats({'sequences' => {'paired' => {'count' => 1}}})
170
-
171
- if (mid.nil? || (mid.message=='no_MID') ) # without mid
172
- mid_id = 'no_MID'
173
- mid_message = ' No MID found'
174
- else
175
- mid_id = mid.tag_id
176
- mid_message=''
177
- if mid_id != mid_message
178
- mid_message = ' '+mid.message
179
- end
180
- end
181
-
182
- # fasta_file = get_paired_file(mid_id)
183
-
184
- n="#{seq.seq_name}_left"
185
- c="template=#{seq.seq_name} dir=R library=#{mid_id}"
186
- f=inserts[0].reverse.tr('actgACTG','tgacTGAC')
187
- q=[]
188
- if @use_qual
189
- q=qual_inserts[0].reverse
190
- end
191
-
192
- paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
193
-
194
-
195
- n="#{seq.seq_name}_right"
196
- c="template=#{seq.seq_name} dir=F library=#{mid_id}"
197
- f=inserts[1]
198
- q=[]
199
- if @use_qual
200
- q=qual_inserts[1]
201
- end
202
-
203
- paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
204
-
205
-
206
- elsif (inserts.count == 1) # sequence with one insert
207
-
208
- if (mid.nil? || (mid.message=='no_MID') ) # without mid
209
- mid_id = 'no_MID'
210
- mid_message = ' No MID found'
211
- else
212
- mid_id = mid.tag_id
213
- mid_message=''
214
- if mid_id != mid_message
215
- mid_message = ' '+mid.message
216
- end
217
- end
218
-
219
- # save fasta and qual in no MID file
220
- has_low_complexity = seq.get_actions(ActionLowComplexity)
221
-
222
- if has_low_complexity.empty?
223
- add_stat(stats,'sequences','count','output_seqs')
224
-
225
- # fasta_file = get_sequence_file(mid_id)
226
- # sff_file=get_sffinfo_file(mid_id)
227
- fasta_file=sequence_file(files,dir_name,file_name)
228
- sff_file=sffinfo_file(files,dir_name,file_name)
229
- else
230
- add_stat(stats,'sequences','count','output_seqs_low_complexity')
231
-
232
- # fasta_file = get_low_complexity_file(mid_id)
233
- # sff_file=get_low_sffinfo_file(mid_id)
234
- fasta_file=low_complexity_file(files,dir_name,file_name)
235
- sff_file=low_sffinfo_file(files,dir_name,file_name)
60
+ # execute plugins
61
+ @plugin_manager.execute_plugins(running_seqs)
62
+
63
+ # add output data
64
+ add_output_data(running_seqs)
65
+
66
+ return running_seqs
67
+ end
68
+
69
+ def receive_initial_config(obj)
70
+
71
+ # Reads the parameters
72
+ $WORKER_LOG.info "Params received"
73
+ # @params = Params.new(params_path)
74
+ @params = obj
75
+ @tuple_size=@params.get_param('tuple_size')
76
+
77
+ @use_qual=@params.get_param('use_qual')
78
+ @use_json=@params.get_param('use_json')
79
+ end
80
+
81
+ def starting_worker
82
+
83
+ # $WORKER_LOG.level = Logger::ERROR
84
+ $WORKER_LOG.level = Logger::WARN
85
+ $WORKER_LOG.info "Loading actions"
86
+
87
+ @action_manager = ActionManager.new
88
+
89
+ $WORKER_LOG.info "Loading plugins"
90
+ @plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array
91
+ $WORKER_LOG.info "PLUGIN LIST:" + @plugin_list
92
+
93
+ @plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it
94
+
95
+ rescue Exception => e
96
+ puts (e.message+ e.backtrace.join("\n"))
97
+
98
+ end
99
+
100
+
101
+ def closing_worker
102
+
103
+ end
104
+
105
+
106
+ def add_output_data(obj)
107
+ obj.output_text=[]
108
+
109
+ if @tuple_size>1
110
+ obj.each_slice(@tuple_size) do |seqs|
111
+
112
+ write_seq_to_files_tuple(obj.output_files,seqs, obj.stats)
113
+
114
+ seqs.each do |seq|
115
+ obj.output_text << seq.to_text
236
116
  end
237
-
238
- q=[]
239
- if @use_qual
240
- q=qual_inserts[0]
241
- end
242
-
243
- n=seq.seq_name
244
- c=mid_message
245
- f=inserts[0]
246
-
247
- fasta_file << FastqFile.to_fastq(n,f,q,c)
248
-
249
- inserts_pos = seq.get_actions(ActionInsert)
250
-
251
- sff_file<< "#{n} #{inserts_pos[0].start_pos+1} #{inserts_pos[0].end_pos+1}"
252
-
253
117
  end
254
-
255
- end
256
-
257
-
258
- # ACCESS TO FILES
259
-
260
- def json_file(files)
261
- return get_file(files,File.join(OUTPUT_PATH,'results.json'))
262
- end
263
-
264
- def rejected_output_file(files)
265
- return get_file(files,File.join(OUTPUT_PATH,'rejected.txt'))
266
- end
267
-
268
-
269
- def sequence_file(files, dir_name, file_name)
270
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'sequences_'+file_name+'.fastq'))
271
- end
272
-
273
- def paired_file(files, dir_name, file_name)
274
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_'+file_name+'.fastq'))
118
+
119
+ else
120
+ obj.each do |seq|
121
+ write_seq_to_files_normal(obj.output_files,seq, obj.stats)
122
+ obj.output_text << seq.to_text
123
+ end
124
+ end
125
+
126
+ # @remove seqs since they are not needed anymore to write output files
127
+ obj.remove_all_seqs
128
+ end
129
+
130
+ def add_stat(stats,key,subkey,value,count=1)
131
+
132
+ stats[key]={} if !stats[key]
133
+ stats[key][subkey]={} if !stats[key][subkey]
134
+ stats[key][subkey][value]=0 if !stats[key][subkey][value]
135
+
136
+ stats[key][subkey][value]+=count
137
+ end
138
+
139
+ def write_seq_to_files_tuple(files,seqs, stats)
140
+
141
+
142
+ seq1=seqs[0]
143
+ seq2=seqs[1]
144
+
145
+ dir_name,file_name,priority=seq1.get_file_tag_path
146
+ dir_name2,file_name2,priority2=seq2.get_file_tag_path
147
+
148
+ # both paired sequences must go in same file, there are priorities
149
+ if (dir_name!=dir_name2) || (file_name!=file_name2)
150
+ if priority2>priority
151
+ dir_name=dir_name2
152
+ file_name=file_name2
153
+ end
275
154
  end
155
+
156
+ # get current inserts
157
+ inserts1 = seq1.get_inserts
158
+ inserts2 = seq2.get_inserts
276
159
 
277
- def low_complexity_file(files, dir_name, file_name)
278
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_'+file_name+'.fastq'))
160
+ # qualities are optional
161
+ if @use_qual
162
+ qual_inserts1 = seq1.get_qual_inserts
163
+ qual_inserts2 = seq2.get_qual_inserts
279
164
  end
280
-
281
- def sffinfo_file(files, dir_name, file_name)
282
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'sff_info_'+file_name+'.txt'))
165
+
166
+
167
+
168
+ # save json if necessary
169
+ if @use_json
170
+ json_file(files)<< seq1.to_json
171
+ json_file(files)<< seq2.to_json
283
172
  end
284
173
 
285
- def low_sffinfo_file(files, dir_name, file_name)
286
- return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_sff_info_'+file_name+'.txt'))
174
+ # find mids
175
+ mid1 = seq1.get_actions(ActionMid).first
176
+ mid2 = seq2.get_actions(ActionMid).first
177
+
178
+
179
+ if !inserts1.empty? && !inserts2.empty? # both have inserts
180
+ # save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
181
+ save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid1,dir_name,file_name)
182
+ else
183
+ save_rejected_empty_or_single(files,seq1, stats,inserts1,qual_inserts1,mid1,dir_name,file_name)
184
+ save_rejected_empty_or_single(files,seq2, stats,inserts2,qual_inserts2,mid2,dir_name,file_name)
185
+ end
186
+
187
+ end
188
+
189
+ def save_two_inserts_tuple(files,seq1,seq2, stats,inserts1,inserts2,qual_inserts1,qual_inserts2,mid,dir_name,file_name)
190
+
191
+ add_stat(stats,'sequences','count','output_seqs_paired')
192
+ add_stat(stats,'sequences','count','output_seqs_paired')
193
+
194
+ mid_id,mid_message=get_mid_message(mid)
195
+
196
+ # save left read
197
+ n="#{seq1.seq_name}"
198
+ c=seq1.get_comment_line # "template=#{seq1.seq_name} dir=R library=#{mid_id}"
199
+ f=inserts1[0]#.reverse.tr('actgACTG','tgacTGAC')
200
+ q=[]
201
+ if @use_qual
202
+ q=qual_inserts1[0] #.reverse
287
203
  end
288
-
289
- def get_file(files,fn)
290
- res=files[fn]
291
-
292
- if !res
293
- files[fn]=[]
294
- res=files[fn]
204
+
205
+ paired_file_ilu1(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
206
+
207
+ # save right read
208
+ n="#{seq2.seq_name}"
209
+ c=seq2.get_comment_line # "template=#{seq2.seq_name} dir=F library=#{mid_id}"
210
+ f=inserts2[0]
211
+ q=[]
212
+ if @use_qual
213
+ q=qual_inserts2[0]
214
+ end
215
+
216
+ paired_file_ilu2(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
217
+
218
+ end
219
+
220
+
221
+ def save_rejected_empty_or_single(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
222
+ if (seq.seq_rejected) # save to rejected sequences
223
+ save_rejected_seq(files,seq, stats)
224
+ elsif (inserts.empty?) #sequence with no inserts
225
+ save_empty_insert(files,seq, stats)
226
+ elsif (inserts.count == 1) # sequence with one insert
227
+ save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
228
+ end
229
+ end
230
+
231
+
232
+ # SAVE NORMAL ===============================
233
+ def save_rejected_seq(files,seq, stats)
234
+ # message = seq.seq_rejected_by_message
235
+ message= seq.get_comment_line
236
+ rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
237
+
238
+ add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message)
239
+ add_stat(stats,'sequences','count','rejected')
240
+ end
241
+
242
+ def save_empty_insert(files,seq, stats)
243
+ seq.seq_rejected=true
244
+ seq.seq_rejected_by_message='short insert'
245
+
246
+ message = 'No valid inserts found'
247
+
248
+ rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
249
+
250
+ add_stat(stats,'sequences','rejected',message)
251
+ add_stat(stats,'sequences','count','rejected')
252
+
253
+ end
254
+
255
+ def get_mid_message(mid)
256
+ if (mid.nil? || (mid.message=='no_MID') ) # without mid
257
+ mid_id = 'no_MID'
258
+ mid_message = ' No MID found'
259
+ else
260
+ mid_id = mid.tag_id
261
+ mid_message=''
262
+ if mid_id != mid_message
263
+ mid_message = ' '+mid.message
295
264
  end
265
+ end
266
+ return mid_id,mid_message
267
+ end
268
+
269
+ def save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
270
+ add_stat(stats,'sequences','count','output_seqs_paired')
271
+
272
+ mid_id,mid_message=get_mid_message(mid)
273
+
274
+ # save left read
275
+ n="#{seq.seq_name}_left"
276
+ c="template=#{seq.seq_name} dir=R library=#{mid_id} #{seq.get_comment_line}"
277
+ f=inserts[0].reverse.tr('actgACTG','tgacTGAC')
278
+ q=[]
279
+ if @use_qual
280
+ q=qual_inserts[0].reverse
281
+ end
282
+
283
+ paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
284
+
285
+ # save right read
286
+ n="#{seq.seq_name}_right"
287
+ c="template=#{seq.seq_name} dir=F library=#{mid_id} #{seq.get_comment_line}"
288
+ f=inserts[1]
289
+ q=[]
290
+ if @use_qual
291
+ q=qual_inserts[1]
292
+ end
293
+
294
+ paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
295
+
296
+ end
297
+
298
+ def save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
299
+ mid_id,mid_message=get_mid_message(mid)
300
+
301
+ # save fasta and qual in no MID file
302
+ has_low_complexity = seq.get_actions(ActionLowComplexity)
303
+
304
+ if has_low_complexity.empty?
305
+ add_stat(stats,'sequences','count','output_seqs')
306
+
307
+ fasta_file=sequence_file(files,dir_name,file_name)
308
+ sff_file=sffinfo_file(files,dir_name,file_name)
309
+ else
310
+ add_stat(stats,'sequences','count','output_seqs_low_complexity')
311
+
312
+ fasta_file=low_complexity_file(files,dir_name,file_name)
313
+ sff_file=low_sffinfo_file(files,dir_name,file_name)
314
+ end
315
+
316
+ q=[]
317
+ if @use_qual
318
+ q=qual_inserts[0]
319
+ end
320
+
321
+ n=seq.seq_name
322
+ c=mid_message
323
+
324
+ seq_comments=seq.get_comment_line
325
+ if !seq_comments.strip.empty?
326
+ c=seq_comments + c
327
+ end
328
+
329
+ f=inserts[0]
330
+
331
+ fasta_file << FastqFile.to_fastq(n,f,q,c)
332
+
333
+ inserts_pos = seq.get_actions(ActionInsert)
334
+
335
+ sff_file<< "#{n} #{inserts_pos[0].start_pos+1} #{inserts_pos[0].end_pos+1}"
336
+
337
+
338
+ end
339
+
340
+
341
+ def write_seq_to_files_normal(files,seq, stats)
342
+
343
+ # puts stats.to_json
344
+
345
+ dir_name,file_name,priority=seq.get_file_tag_path
346
+ # puts File.join(dir_name,'sequences_'+file_name)
347
+
348
+ # get current inserts
349
+ inserts = seq.get_inserts
350
+
351
+ # qualities are optional
352
+ if @use_qual
353
+ qual_inserts = seq.get_qual_inserts
354
+ end
355
+
356
+ # save json if necessary
357
+ if @use_json
358
+ json_file(files)<< seq.to_json
359
+ end
360
+
361
+ # find mids
362
+ mid = seq.get_actions(ActionMid).first
363
+
364
+
365
+ if (seq.seq_rejected) # save to rejected sequences
366
+ save_rejected_seq(files,seq, stats)
296
367
 
297
- return res
368
+ elsif (inserts.empty?) #sequence with no inserts
369
+ save_empty_insert(files,seq, stats)
370
+
371
+ elsif (inserts.count == 2) # sequence with two inserts = PAIRED SEQUENCES
372
+ save_two_inserts(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
373
+
374
+ elsif (inserts.count == 1) # sequence with one insert
375
+ save_one_insert(files,seq, stats,inserts,qual_inserts,mid,dir_name,file_name)
376
+ end
377
+
378
+ end
379
+
380
+
381
+
382
+
383
+
384
+ # ACCESS TO FILES
385
+
386
+ def json_file(files)
387
+ return get_file(files,File.join(OUTPUT_PATH,'results.json'))
388
+ end
389
+
390
+ def rejected_output_file(files)
391
+ return get_file(files,File.join(OUTPUT_PATH,'rejected.txt'))
392
+ end
393
+
394
+
395
+ def sequence_file(files, dir_name, file_name)
396
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'sequences_'+file_name+'.fastq'))
397
+ end
398
+
399
+ def paired_file(files, dir_name, file_name)
400
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_'+file_name+'.fastq'))
401
+ end
402
+
403
+ def paired_file_ilu1(files, dir_name, file_name)
404
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_1_'+file_name+'.fastq'))
405
+ end
406
+
407
+ def paired_file_ilu2(files, dir_name, file_name)
408
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_2_'+file_name+'.fastq'))
409
+ end
410
+
411
+
412
+ def low_complexity_file(files, dir_name, file_name)
413
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_'+file_name+'.fastq'))
414
+ end
415
+
416
+ def sffinfo_file(files, dir_name, file_name)
417
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'sff_info_'+file_name+'.txt'))
418
+ end
419
+
420
+ def low_sffinfo_file(files, dir_name, file_name)
421
+ return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_sff_info_'+file_name+'.txt'))
422
+ end
423
+
424
+ def get_file(files,fn)
425
+ res=files[fn]
426
+
427
+ if !res
428
+ files[fn]=[]
429
+ res=files[fn]
298
430
  end
299
-
431
+
432
+ return res
433
+ end
434
+
300
435
  end