seqtrimnext 2.0.46 → 2.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ === 2.0.48 2012-05-16
2
+
3
+ Select biggest sequence on cluster
4
+
5
+ === 2.0.47 2012-05-14
6
+
7
+ Added option for illumina qualities based on phred+64 scores
8
+
1
9
  === 2.0.46 2012-04-13
2
10
 
3
11
  Checkpointing activated. Jobs can be restarted where stopped.
data/bin/seqtrimnext CHANGED
@@ -206,6 +206,11 @@ optparse = OptionParser.new do |opts|
206
206
  opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do
207
207
  options[:check_db] = true
208
208
  end
209
+
210
+ options[:use_checkpoint] = false
211
+ opts.on( '-C', '--use_checkpoint', 'Restore at checkpoint if scbi_mapreduce_checkpoint file is available' ) do
212
+ options[:use_checkpoint] = true
213
+ end
209
214
 
210
215
  # options[:skip_initial_stats] = false
211
216
  # opts.on( '-k', '--skip_initial_stats', 'Skip initial stats' ) do
@@ -227,6 +232,16 @@ optparse = OptionParser.new do |opts|
227
232
  opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
228
233
  options[:fastq] = file
229
234
  end
235
+
236
+ options[:format] = nil
237
+ opts.on( '-F', '--fastq_quality_format FORMAT', 'Fastq input quality format use sanger or illumina18 for phred+33 based scores. Use illumina15 for phred+64 based scores (default is sanger) file. Use - for <STDIN>' ) do |value|
238
+ options[:format] = value
239
+ if !['sanger','illumina15', 'illumina18'].include?(value)
240
+ STDERR.puts "ERROR: Invalid FASTQ format parameter #{value}"
241
+ exit
242
+ end
243
+ end
244
+
230
245
 
231
246
  options[:fasta] = nil
232
247
  opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
@@ -123,11 +123,16 @@ class Params
123
123
  #puts line,line[0]
124
124
  # en ruby19 line[0] da el caracter, no el chr
125
125
  #if (line[0]!=62) && (line[0]!=48)
126
- if (line[0]!='>'[0]) && (line[0]!='0'[0])
126
+ # if (line[0]!='>'[0]) && (line[0]!='0'[0])
127
+
128
+ # line doesn't finish in *
129
+ if (line[0]!='>'[0]) && (!(line =~ /\*$/))
130
+
127
131
  #puts line
128
132
  # puts line,line[0]
129
133
  if line =~ />([^\.]+)\.\.\.\s/
130
134
  #puts 'ok'
135
+ # puts $1
131
136
  @clusters[$1]=1
132
137
  end
133
138
  end
@@ -160,6 +160,16 @@ class Seqtrim
160
160
  only_workers=options[:only_workers]
161
161
  chunk_size = options[:chunk_size]
162
162
  use_json = options[:json]
163
+
164
+ # check for checkpoint
165
+
166
+ if File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
167
+ if !options[:use_checkpoint]
168
+ STDERR.puts "ERROR: A checkpoint file exists, either delete it or provide -C flag to use it"
169
+ exit
170
+ end
171
+ end
172
+
163
173
 
164
174
 
165
175
  # it is the server part
@@ -172,13 +182,28 @@ class Seqtrim
172
182
 
173
183
  # open sequence reader and expand input files paths
174
184
  if options[:fastq]
185
+
175
186
  if options[:fastq]=='-'
176
187
  seqs_path = STDIN
177
188
  else
178
189
  seqs_path = File.expand_path(options[:fastq])
179
190
  end
191
+
180
192
  cd_hit_input_file = seqs_path
181
- sequence_reader = FastqFile.new(seqs_path,'r',:sanger, true)
193
+
194
+ # choose fastq quality format
195
+ format=:sanger
196
+
197
+ case options[:format]
198
+ when 'sanger'
199
+ format = :sanger
200
+ when 'illumina15'
201
+ format = :ilumina
202
+ when 'illumina18'
203
+ format = :sanger
204
+ end
205
+
206
+ sequence_reader = FastqFile.new(seqs_path,'r',format, true)
182
207
  # cd_hit_input_file = 'cd-hit-input.fasta'
183
208
  cd_hit_input_file = seqs_path
184
209
  # $LOG.info "Converting input file for cd-hit-454"
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.46'
33
+ VERSION = '2.0.48'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.46
5
+ version: 2.0.48
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-04-13 00:00:00 Z
13
+ date: 2012-05-16 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray