seqtrimnext 2.0.46 → 2.0.48

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ === 2.0.48 2012-05-16
2
+
3
+ Select biggest sequence on cluster
4
+
5
+ === 2.0.47 2012-05-14
6
+
7
+ Added option for illumina qualities based on phred+64 scores
8
+
1
9
  === 2.0.46 2012-04-13
2
10
 
3
11
  Checkpointing activated. Jobs can be restarted where stopped.
data/bin/seqtrimnext CHANGED
@@ -206,6 +206,11 @@ optparse = OptionParser.new do |opts|
206
206
  opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do
207
207
  options[:check_db] = true
208
208
  end
209
+
210
+ options[:use_checkpoint] = false
211
+ opts.on( '-C', '--use_checkpoint', 'Restore at checkpoint if scbi_mapreduce_checkpoint file is available' ) do
212
+ options[:use_checkpoint] = true
213
+ end
209
214
 
210
215
  # options[:skip_initial_stats] = false
211
216
  # opts.on( '-k', '--skip_initial_stats', 'Skip initial stats' ) do
@@ -227,6 +232,16 @@ optparse = OptionParser.new do |opts|
227
232
  opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
228
233
  options[:fastq] = file
229
234
  end
235
+
236
+ options[:format] = nil
237
+ opts.on( '-F', '--fastq_quality_format FORMAT', 'Fastq input quality format use sanger or illumina18 for phred+33 based scores. Use illumina15 for phred+64 based scores (default is sanger) file. Use - for <STDIN>' ) do |value|
238
+ options[:format] = value
239
+ if !['sanger','illumina15', 'illumina18'].include?(value)
240
+ STDERR.puts "ERROR: Invalid FASTQ format parameter #{value}"
241
+ exit
242
+ end
243
+ end
244
+
230
245
 
231
246
  options[:fasta] = nil
232
247
  opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
@@ -123,11 +123,16 @@ class Params
123
123
  #puts line,line[0]
124
124
  # en ruby19 line[0] da el caracter, no el chr
125
125
  #if (line[0]!=62) && (line[0]!=48)
126
- if (line[0]!='>'[0]) && (line[0]!='0'[0])
126
+ # if (line[0]!='>'[0]) && (line[0]!='0'[0])
127
+
128
+ # line doesn't finish in *
129
+ if (line[0]!='>'[0]) && (!(line =~ /\*$/))
130
+
127
131
  #puts line
128
132
  # puts line,line[0]
129
133
  if line =~ />([^\.]+)\.\.\.\s/
130
134
  #puts 'ok'
135
+ # puts $1
131
136
  @clusters[$1]=1
132
137
  end
133
138
  end
@@ -160,6 +160,16 @@ class Seqtrim
160
160
  only_workers=options[:only_workers]
161
161
  chunk_size = options[:chunk_size]
162
162
  use_json = options[:json]
163
+
164
+ # check for checkpoint
165
+
166
+ if File.exists?(ScbiMapreduce::CHECKPOINT_FILE)
167
+ if !options[:use_checkpoint]
168
+ STDERR.puts "ERROR: A checkpoint file exists, either delete it or provide -C flag to use it"
169
+ exit
170
+ end
171
+ end
172
+
163
173
 
164
174
 
165
175
  # it is the server part
@@ -172,13 +182,28 @@ class Seqtrim
172
182
 
173
183
  # open sequence reader and expand input files paths
174
184
  if options[:fastq]
185
+
175
186
  if options[:fastq]=='-'
176
187
  seqs_path = STDIN
177
188
  else
178
189
  seqs_path = File.expand_path(options[:fastq])
179
190
  end
191
+
180
192
  cd_hit_input_file = seqs_path
181
- sequence_reader = FastqFile.new(seqs_path,'r',:sanger, true)
193
+
194
+ # choose fastq quality format
195
+ format=:sanger
196
+
197
+ case options[:format]
198
+ when 'sanger'
199
+ format = :sanger
200
+ when 'illumina15'
201
+ format = :ilumina
202
+ when 'illumina18'
203
+ format = :sanger
204
+ end
205
+
206
+ sequence_reader = FastqFile.new(seqs_path,'r',format, true)
182
207
  # cd_hit_input_file = 'cd-hit-input.fasta'
183
208
  cd_hit_input_file = seqs_path
184
209
  # $LOG.info "Converting input file for cd-hit-454"
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.46'
33
+ VERSION = '2.0.48'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.46
5
+ version: 2.0.48
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-04-13 00:00:00 Z
13
+ date: 2012-05-16 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray