seqtrimnext 2.0.51 → 2.0.52

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +3 -3
  3. data/README.rdoc +18 -3
  4. data/Rakefile +2 -1
  5. data/bin/parse_params.rb +5 -1
  6. data/bin/seqtrimnext +53 -21
  7. data/lib/seqtrimnext/actions/{action_classify.rb → action_user_contaminant.rb} +2 -2
  8. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +64 -20
  9. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +375 -240
  10. data/lib/seqtrimnext/classes/extract_stats.rb +26 -23
  11. data/lib/seqtrimnext/classes/params.rb +109 -123
  12. data/lib/seqtrimnext/classes/plugin_manager.rb +2 -4
  13. data/lib/seqtrimnext/classes/seqtrim.rb +24 -29
  14. data/lib/seqtrimnext/classes/sequence.rb +2 -2
  15. data/lib/seqtrimnext/classes/sequence_group.rb +21 -1
  16. data/lib/seqtrimnext/classes/sequence_with_action.rb +25 -13
  17. data/lib/seqtrimnext/plugins/plugin.rb +42 -12
  18. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +1 -8
  19. data/lib/seqtrimnext/plugins/plugin_adapters.rb +0 -9
  20. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +0 -12
  21. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +5 -8
  22. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +1 -10
  23. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +1 -11
  24. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +1 -7
  25. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +1 -8
  26. data/lib/seqtrimnext/plugins/plugin_key.rb +1 -9
  27. data/lib/seqtrimnext/plugins/plugin_linker.rb +0 -9
  28. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +6 -21
  29. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +3 -13
  30. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +126 -330
  31. data/lib/seqtrimnext/plugins/plugin_mids.rb +0 -11
  32. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +1 -10
  33. data/lib/seqtrimnext/plugins/plugin_user_contaminants.rb +40 -32
  34. data/lib/seqtrimnext/plugins/plugin_vectors.rb +0 -9
  35. data/lib/seqtrimnext/templates/amplicons.txt +1 -8
  36. data/lib/seqtrimnext/templates/genomics_454.txt +12 -8
  37. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +19 -1
  38. data/lib/seqtrimnext/templates/genomics_short_reads.txt +26 -1
  39. data/lib/seqtrimnext/templates/genomics_short_reads_2.txt +24 -1
  40. data/lib/seqtrimnext/templates/only_quality.txt +24 -0
  41. data/lib/seqtrimnext/templates/sanger.txt +25 -0
  42. data/lib/seqtrimnext/templates/transcriptomics_454.txt +18 -1
  43. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +22 -1
  44. data/lib/seqtrimnext/templates/transcriptomics_short_reads.txt +23 -1
  45. data/lib/seqtrimnext.rb +1 -1
  46. metadata +20 -7
  47. data/lib/seqtrimnext/plugins/plugin_adapters_old.rb +0 -165
  48. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +0 -245
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ === 2.0.52 2012-06-26
2
+
3
+ * Added new plugin for user contaminants.
4
+ * Sequences contaminated with user contaminants are stored in separate files.
5
+ * Processing of both illumina fastq paired-end files in the same execution.
6
+ * Template reorganization.
7
+
1
8
  === 2.0.51 2012-06-20
2
9
 
3
10
  Added cont_viruses database
data/Manifest.txt CHANGED
@@ -24,7 +24,7 @@ History.txt
24
24
  lib/seqtrimnext/actions/action_ab_adapter.rb
25
25
  lib/seqtrimnext/actions/action_ab_far_adapter.rb
26
26
  lib/seqtrimnext/actions/action_ab_left_adapter.rb
27
- lib/seqtrimnext/actions/action_classify.rb
27
+ lib/seqtrimnext/actions/action_user_contaminant.rb
28
28
  lib/seqtrimnext/actions/action_empty_insert.rb
29
29
  lib/seqtrimnext/actions/action_ignore_repeated.rb
30
30
  lib/seqtrimnext/actions/action_indetermination.rb
@@ -75,7 +75,6 @@ lib/seqtrimnext/classes/sequence_with_action.rb
75
75
  lib/seqtrimnext/plugins/plugin.rb
76
76
  lib/seqtrimnext/plugins/plugin_ab_adapters.rb
77
77
  lib/seqtrimnext/plugins/plugin_adapters.rb
78
- lib/seqtrimnext/plugins/plugin_adapters_old.rb
79
78
  lib/seqtrimnext/plugins/plugin_amplicons.rb
80
79
  lib/seqtrimnext/plugins/plugin_contaminants.rb
81
80
  lib/seqtrimnext/plugins/plugin_user_contaminants.rb
@@ -89,10 +88,11 @@ lib/seqtrimnext/plugins/plugin_low_complexity.rb
89
88
  lib/seqtrimnext/plugins/plugin_low_high_size.rb
90
89
  lib/seqtrimnext/plugins/plugin_low_quality.rb
91
90
  lib/seqtrimnext/plugins/plugin_mids.rb
92
- lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
93
91
  lib/seqtrimnext/plugins/plugin_short_insert.rb
94
92
  lib/seqtrimnext/plugins/plugin_vectors.rb
95
93
  lib/seqtrimnext/templates/amplicons.txt
94
+ lib/seqtrimnext/templates/sanger.txt
95
+ lib/seqtrimnext/templates/only_quality.txt
96
96
  lib/seqtrimnext/templates/genomics_454.txt
97
97
  lib/seqtrimnext/templates/genomics_454_with_paired.txt
98
98
  lib/seqtrimnext/templates/genomics_short_reads.txt
data/README.rdoc CHANGED
@@ -48,7 +48,7 @@ To install core databases (it should be done at installation time):
48
48
 
49
49
  $> seqtrimnext -i core
50
50
 
51
- Databases will be installed nearby SeqtrimNEXT by default, but you can override this location by setting the environment variable +BASTDB+. Eg.:
51
+ Databases will be installed nearby SeqtrimNEXT by default, but you can override this location by setting the environment variable +BLASTDB+. Eg.:
52
52
 
53
53
  If you with your database installed at /var:
54
54
 
@@ -56,6 +56,10 @@ If you with your database installed at /var:
56
56
 
57
57
  Be sure that this environment variable is always loaded before SeqtrimNEXT execution (Eg.: add it to /etc/profile.local).
58
58
 
59
+ There are aditional databases. To list them:
60
+
61
+ $> seqtrimnext -i LIST
62
+
59
63
  To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
60
64
 
61
65
  $> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w 4
@@ -64,6 +68,13 @@ To perform an analisys using a predefined template with a FASTQ file format:
64
68
 
65
69
  $> seqtrimnext -t genomics_454.txt -f input_file_in_FASTA -q input_file_in_QUAL
66
70
 
71
+ To clean illumina fastq files, with paired-ends and qualities encoded in illumina 1.5 format, using 4 cpus and disabling verbose output:
72
+
73
+ $> seqtrimnext -t genomics_short_reads.txt -F illumina15 -Q p1.fastq,p2.fastq -w 4 -K
74
+
75
+ To clean illumina fastq files, with paired-ends and qualities encoded in standard phred format, using 4 cpus and disabling verbose output:
76
+
77
+ $> seqtrimnext -t genomics_short_reads.txt -Q p1.fastq,p2.fastq -w 4 -K
67
78
 
68
79
  To get additional help and list available templates and databases:
69
80
 
@@ -186,13 +197,17 @@ SeqtrimNEXT needs some core databases to work. To install them:
186
197
 
187
198
  seqtrimnext -i core
188
199
 
189
- You can change default database location by setting the environment variable +BASTDB+. Refer to SYNOPSIS for an example.
200
+ You can change default database location by setting the environment variable +BLASTDB+. Refer to SYNOPSIS for an example.
201
+
202
+ There are aditional databases that can be listed with:
203
+
204
+ seqtrimnext -i LIST
190
205
 
191
206
  === Database modifications
192
207
 
193
208
  Included databases will be usefull for a lot of people, but if you prefer, you can modify them, or add more elements to be search against your sequences.
194
209
 
195
- You only need to drop new fasta files to each respective directory:
210
+ You only need to drop new fasta files to each respective directory, or even create new directories with new fasta files inside. Each directory with fasta files will be used as a database:
196
211
 
197
212
  DB/vectors to add more vectors
198
213
  DB/contaminants to add more contaminants
data/Rakefile CHANGED
@@ -16,7 +16,7 @@ $hoe = Hoe.spec 'seqtrimnext' do
16
16
  self.rubyforge_name = self.name # TODO this is default value
17
17
  # self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
18
18
 
19
- self.extra_deps = []
19
+ self.extra_deps = []
20
20
  self.extra_deps << ['narray','>=0']
21
21
  self.extra_deps << ['gnuplot','>=0']
22
22
  self.extra_deps << ['term-ansicolor','>=1.0.5']
@@ -27,6 +27,7 @@ $hoe = Hoe.spec 'seqtrimnext' do
27
27
  self.extra_deps << ['scbi_fastq','>=0.0.16']
28
28
  self.extra_deps << ['scbi_plot','>=0.0.6']
29
29
  self.extra_deps << ['scbi_math','>=0.0.1']
30
+ self.extra_deps << ['scbi_headers','>=0.0.2']
30
31
 
31
32
  end
32
33
 
data/bin/parse_params.rb CHANGED
@@ -26,6 +26,7 @@ params={}
26
26
  params['vector_db_field']='vectors_db'
27
27
  params['primers_db_field']='primers_db'
28
28
  params['contaminants_db_field']='contaminants_db'
29
+ params['user_contaminants_db_field']='user_contaminants_db'
29
30
  params['species_field']='genus'
30
31
  params['min_insert_size_field']='min_insert_size_trimmed'
31
32
  params['min_paired_insert_size_field']='min_insert_size_paired'
@@ -53,6 +54,7 @@ end
53
54
  sq_params=File.open(params_file,'r')
54
55
 
55
56
  data=get_json_data(input_file)
57
+
56
58
  # puts data.keys
57
59
  # puts data['vector_db_field']
58
60
 
@@ -69,10 +71,12 @@ data=get_json_data(input_file)
69
71
 
70
72
  sq_params=File.open(params_file,'a+')
71
73
 
74
+ sq_params.puts ""
75
+
72
76
  data.each do |k,v|
73
77
 
74
78
  sq_name=params[k]
75
- # puts k,sq_name
79
+ # puts k,sq_name
76
80
 
77
81
  if sq_name && v && !v.empty?
78
82
  sq_params.puts "#{sq_name}=#{v}"
data/bin/seqtrimnext CHANGED
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
2
4
  # SeqTrimNext: Next generation sequencing preprocessor
3
5
  # Copyright (C) <2011>
4
6
  # Authors: Almudena Bocinos Rioboo, Diego Dario Guerrero Fernandez,
@@ -57,9 +59,35 @@
57
59
  # $: << File.expand_path(ROOT_PATH)
58
60
 
59
61
  $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
60
- $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib/')
62
+ # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib/')
61
63
 
62
64
  require 'seqtrimnext'
65
+ require 'scbi_headers'
66
+
67
+
68
+ def put_header
69
+ header = ScbiHeader.new('SeqTrimNEXT',Seqtrimnext::SEQTRIM_VERSION)
70
+
71
+ header.description="SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation."
72
+
73
+ header.copyright='2011'
74
+
75
+ header.authors<< "Darío Guerrero"
76
+ header.authors<< "Almudena Bocinos"
77
+ header.authors<< "Rocío Bautista"
78
+ header.authors<< "Noé Fernández"
79
+ header.authors<< "Juan Falgueras"
80
+ header.authors<< "M. Gonzalo Claros"
81
+
82
+ # header.articles<< "Article one: with one description line"
83
+ # header.articles<< "Article two: with one description line"
84
+
85
+ # To output the header
86
+ puts header
87
+
88
+ end
89
+
90
+ put_header
63
91
 
64
92
  ############ PATHS #######################
65
93
  $SEQTRIM_PATH = ROOT_PATH
@@ -192,7 +220,7 @@ optparse = OptionParser.new do |opts|
192
220
  end
193
221
 
194
222
  end
195
-
223
+
196
224
  end
197
225
 
198
226
 
@@ -211,12 +239,12 @@ optparse = OptionParser.new do |opts|
211
239
  opts.on( '-C', '--use_checkpoint', 'Restore at checkpoint if scbi_mapreduce_checkpoint file is available' ) do
212
240
  options[:use_checkpoint] = true
213
241
  end
214
-
242
+
215
243
  # options[:skip_initial_stats] = false
216
244
  # opts.on( '-k', '--skip_initial_stats', 'Skip initial stats' ) do
217
245
  # options[:skip_initial_stats] = true
218
246
  # end
219
-
247
+
220
248
 
221
249
  options[:install_db] = nil
222
250
  opts.on( '-i', '--install_databases TYPE', 'Install base databases and reformat them if necessary') do |db_type|
@@ -229,10 +257,12 @@ optparse = OptionParser.new do |opts|
229
257
  end
230
258
 
231
259
  options[:fastq] = nil
232
- opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
260
+ opts.on( '-Q', '--fastq FILE1,FILE2',Array, 'Fastq input file. Use - for <STDIN>' ) do |file|
233
261
  options[:fastq] = file
262
+ puts "FILES:",file,file.class
263
+
234
264
  end
235
-
265
+
236
266
  options[:format] = nil
237
267
  opts.on( '-F', '--fastq_quality_format FORMAT', 'Fastq input quality format use sanger or illumina18 for phred+33 based scores. Use illumina15 for phred+64 based scores (default is sanger) file. Use - for <STDIN>' ) do |value|
238
268
  options[:format] = value
@@ -241,7 +271,7 @@ optparse = OptionParser.new do |opts|
241
271
  exit
242
272
  end
243
273
  end
244
-
274
+
245
275
 
246
276
  options[:fasta] = nil
247
277
  opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
@@ -255,7 +285,7 @@ optparse = OptionParser.new do |opts|
255
285
 
256
286
  options[:list_db] = nil
257
287
  options[:list_db_name] = 'ALL'
258
-
288
+
259
289
  opts.on( '-L', '--list_db [DB_NAME]', 'List entries IDs in DB_NAME. Use "-L all" to view all available databases' ) do |value|
260
290
  options[:list_db] = true
261
291
  options[:list_db_name] = value if value
@@ -281,12 +311,12 @@ optparse = OptionParser.new do |opts|
281
311
  opts.on( '-j', '--json', 'Save results in json file' ) do
282
312
  options[:json] = true
283
313
  end
284
-
314
+
285
315
  options[:skip_output] = false
286
316
  opts.on( '-K', '--no-verbose', 'Change to no verbose mode. Every sequence will not be written to output log' ) do
287
317
  options[:skip_output] = true
288
318
  end
289
-
319
+
290
320
  options[:skip_report] = false
291
321
  opts.on( '-R', '--no-report', 'Do not generate final PDF report (gem scbi_seqtrimnext_report required if you want to generate PDF report).' ) do
292
322
  options[:skip_report] = true
@@ -335,7 +365,7 @@ $LOG.info("Using options: "+ options.to_json)
335
365
  if options[:install_db] then
336
366
  #install databases
337
367
  InstallDatabase.new(options[:install_db],$DB_PATH)
338
-
368
+
339
369
  # reformat databases
340
370
  MakeBlastDb.new($DB_PATH)
341
371
  exit
@@ -376,13 +406,17 @@ end
376
406
  $LOG.info "Using init file: #{$SEQTRIMNEXT_INIT}"
377
407
  $LOG.info "Using params file: #{options[:template]}"
378
408
 
379
- # fastq file
380
- if (!options[:fastq].nil? && options[:fastq]!='-' && !File.exists?(options[:fastq]))
381
- $LOG.error "Input file: #{options[:fasta]} doesn't exists"
382
- exit
383
- end
384
-
409
+ # check file existence
385
410
 
411
+ if options[:fastq]
412
+ options[:fastq].each do |fastq_file|
413
+ # fastq file
414
+ if (!fastq_file.nil? && fastq_file!='-' && !File.exists?(File.expand_path(fastq_file)))
415
+ $LOG.error "Input file: #{fastq_file} doesn't exists"
416
+ exit
417
+ end
418
+ end
419
+ end
386
420
 
387
421
  # fasta file
388
422
  if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
@@ -398,8 +432,6 @@ end
398
432
 
399
433
  s = Seqtrim.new(options)
400
434
 
401
-
402
-
403
435
  #generate report
404
436
 
405
437
  if !options[:skip_report] && system("which generate_report.rb > /dev/null ")
@@ -408,10 +440,10 @@ if !options[:skip_report] && system("which generate_report.rb > /dev/null ")
408
440
  `#{cmd}`
409
441
  else
410
442
  skip_text='.'
411
-
443
+
412
444
  if options[:skip_report]
413
445
  skip_text=' and remove the -R option from the command line.'
414
446
  end
415
-
447
+
416
448
  $LOG.info "If you want a detailed report in PDF format, be sure you have installed the optional seqtrimnext_report gem (gem install seqtrimnext_report)#{skip_text}"
417
449
  end
@@ -7,10 +7,10 @@ require "seqtrim_action"
7
7
  # Inherit: Plugin
8
8
  ########################################################
9
9
 
10
- class ActionClassify < SeqtrimAction
10
+ class ActionUserContaminant < SeqtrimAction
11
11
 
12
12
  def initialize(start_pos,end_pos)
13
- super(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
14
  @cut =false
15
15
  end
16
16
 
@@ -13,7 +13,7 @@ STATS_PATH=File.join(OUTPUT_PATH,'stats.json')
13
13
 
14
14
  class SeqtrimWorkManager < ScbiMapreduce::WorkManager
15
15
 
16
- def self.init_work_manager(sequence_reader, params, chunk_size = 100, use_json=false, skip_output=false)
16
+ def self.init_work_manager(sequence_readers, params, chunk_size = 100, use_json=false, skip_output=false)
17
17
  @@full_stats={}
18
18
  @@params= params
19
19
  @@exit = false
@@ -22,7 +22,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
22
22
  @@ongoing_stats[:sequence_count] = 0
23
23
  @@ongoing_stats[:smallest_sequence_size] = 900000000000000
24
24
  @@ongoing_stats[:biggest_sequence_size] = 0
25
-
25
+
26
26
  @@skip_output=skip_output
27
27
 
28
28
  @@chunk_size = chunk_size
@@ -36,17 +36,20 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
36
36
  end
37
37
 
38
38
  #open input file
39
- @@fqr=sequence_reader
39
+ @@sequence_readers=sequence_readers
40
40
 
41
41
  # @@use_qual = @@fqr.with_qual?
42
42
  # @@use_json = use_json
43
43
 
44
- @@params.set_param('use_qual',@@fqr.with_qual?)
44
+ @@params.set_param('use_qual',@@sequence_readers.first.with_qual?)
45
45
  @@params.set_param('use_json',use_json)
46
+ @@params.set_param('tuple_size',@@sequence_readers.count)
46
47
 
47
48
  @@use_json=use_json
48
49
 
49
- @@fqr.rewind
50
+ @@sequence_readers.each do |sequence_reader|
51
+ sequence_reader.rewind
52
+ end
50
53
 
51
54
  # open output files
52
55
 
@@ -77,6 +80,8 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
77
80
 
78
81
  @@low_sffinfo_files={}
79
82
 
83
+ @@tuple_id=0
84
+
80
85
  end
81
86
 
82
87
  def self.end_work_manager
@@ -94,13 +99,12 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
94
99
  f.puts JSON.pretty_generate(@@ongoing_stats)
95
100
  end
96
101
  end
97
-
98
102
 
99
103
  # load stats
100
104
  r=File.read(STATS_PATH)
101
105
  stats=JSON::parse(r)
102
106
 
103
-
107
+
104
108
 
105
109
  # make graphs
106
110
  gs=GraphStats.new(stats)
@@ -198,7 +202,7 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
198
202
  # puts "Loaded Stats"
199
203
  # puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
200
204
 
201
- # TODO - remove sequences from rejected file that were added by cloned
205
+ # TODO - remove sequences from rejected file that were added by cloned
202
206
 
203
207
  super
204
208
  # return checkpoint
@@ -218,20 +222,16 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
218
222
  warn "Deprecated: trash_checkpointed_work was deprecated, it is automatic now"
219
223
  end
220
224
 
221
- def next_work
222
-
223
- if @@exit
224
- return nil
225
- end
226
-
225
+ def get_next_seq_from_file(file)
226
+ # find a valid and no repeated sequence in file
227
227
  begin
228
228
 
229
- n,f,q,c = @@fqr.next_seq
229
+ n,f,q,c = file.next_seq
230
230
 
231
231
  if !n.nil? && @@params.repeated_seq?(n)
232
232
  @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
233
233
  @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
234
-
234
+
235
235
  get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
236
236
 
237
237
  end
@@ -240,17 +240,61 @@ class SeqtrimWorkManager < ScbiMapreduce::WorkManager
240
240
  @@ongoing_stats[:sequence_count] += 1
241
241
  @@ongoing_stats[:smallest_sequence_size] = [f.size, @@ongoing_stats[:smallest_sequence_size]].min
242
242
  @@ongoing_stats[:biggest_sequence_size] = [f.size, @@ongoing_stats[:smallest_sequence_size]].max
243
-
243
+
244
244
  @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
245
245
  end
246
+
246
247
  end while (!n.nil? && @@params.repeated_seq?(n))
247
248
 
248
- if !n.nil?
249
- return SequenceWithAction.new(n,f.upcase,q,c)
250
- else
249
+ return n,f,q,c
250
+
251
+ end
252
+
253
+ def next_work
254
+
255
+ if @@exit
251
256
  return nil
252
257
  end
253
258
 
259
+ tuple=[]
260
+ order_in_tuple=0
261
+
262
+ @@tuple_id += 1
263
+ tuple_size=@@sequence_readers.count
264
+
265
+ @@sequence_readers.each do |sequence_reader|
266
+ n,f,q,c = get_next_seq_from_file(sequence_reader)
267
+
268
+ if !n.nil?
269
+ seq=SequenceWithAction.new(n,f.upcase,q,c)
270
+ seq.tuple_id=@@tuple_id
271
+ seq.order_in_tuple=order_in_tuple
272
+ seq.tuple_size=tuple_size
273
+ tuple << seq
274
+ order_in_tuple+=1
275
+ end
276
+
277
+ end
278
+
279
+ if tuple_size>1
280
+ # check duplicated names
281
+ names = tuple.map{|s| s.seq_name}
282
+
283
+ if names.uniq.count!=tuple_size
284
+ # puts "NAMES EQUAL IN TUPLE"
285
+ tuple.each_with_index do |seq,i|
286
+ # puts seq.class # seq_name
287
+ seq.seq_name = "#{seq.seq_name}/#{i+1}"
288
+ end
289
+ end
290
+ end
291
+
292
+ # tuple is complete
293
+ if tuple.count==tuple_size
294
+ return tuple
295
+ else
296
+ return nil
297
+ end
254
298
 
255
299
  end
256
300