ms-sequest 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/History CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.0.5 / 2009-06-22
2
+
3
+ * fixed handling of files with print_duplicate_references = 0
4
+ * removes .hdr postfix on the fasta path for srf -> SQT output
1
5
 
2
6
  == 0.0.4 / 2009-06-18
3
7
 
data/lib/ms/sequest.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.4'
4
+ VERSION = '0.0.5'
5
5
  end
6
6
  end
@@ -148,19 +148,22 @@ class Ms::Sequest::Srf
148
148
  def from_file(filename, opts)
149
149
  opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
150
150
  params = Ms::Sequest::Srf.get_sequest_params(filename)
151
- dups_gt_0 = false
151
+ dup_references = 0
152
152
  if params
153
- dups = params.print_duplicate_references
154
- if dups == '0'
153
+ dup_references = params.print_duplicate_references.to_i
154
+ if dup_references == 0
155
155
  warn <<END
156
- ***************************************************************************
157
- For complete protein <=> peptide linkages, .srf files must be created with
158
- print_duplicate_references > 0. To capture all duplicate references, set the
159
- sequest parameter 'print_duplicate_references' to 100 or greater.
160
- ***************************************************************************
156
+ *****************************************************************************
157
+ WARNING: This srf file lists only 1 protein per peptide! (based on the
158
+ print_duplicate_references parameter in the sequest.params file used in its
159
+ creation) So, downstream output will likewise only contain a single protein
160
+ for each peptide hit. In many instances this is OK since downstream programs
161
+ will recalculate protein-to-peptide linkages from the database file anyway.
162
+ For complete protein lists per peptide hit, .srf files must be created with
163
+ print_duplicate_references > 0. HINT: to capture all duplicate references,
164
+ set the sequest parameter 'print_duplicate_references' to 100 or greater.
165
+ *****************************************************************************
161
166
  END
162
- else
163
- dups_gt_0 = true
164
167
  end
165
168
  else
166
169
  end
@@ -179,7 +182,7 @@ END
179
182
  end
180
183
  @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
181
184
 
182
- @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35)
185
+ @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35, dup_references)
183
186
  if fh.eof?
184
187
  #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
185
188
  @params = nil
@@ -257,10 +260,10 @@ END
257
260
 
258
261
  # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
259
262
  # will put the fh there.
260
- def read_out_files(fh,number_files, measured_mhs, unpack_35)
263
+ def read_out_files(fh,number_files, measured_mhs, unpack_35, dup_references)
261
264
  out_files = Array.new(number_files)
262
265
  header.num_dta_files.times do |i|
263
- out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35)
266
+ out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_references)
264
267
  end
265
268
  out_files
266
269
  end
@@ -467,7 +470,7 @@ class Ms::Sequest::Srf::Out
467
470
  "<Ms::Sequest::Srf::Out first_scan=#{first_scan}, last_scan=#{last_scan}, charge=#{charge}, num_hits=#{num_hits}, computer=#{computer}, date_time=#{date_time}#{hits_s}>"
468
471
  end
469
472
 
470
- def from_io(fh, unpack_35)
473
+ def from_io(fh, unpack_35, dup_references)
471
474
  ## EMPTY out file is 96 bytes
472
475
  ## each hit is 320 bytes
473
476
  ## num_hits and charge:
@@ -484,6 +487,7 @@ class Ms::Sequest::Srf::Out
484
487
  ar[i] = Ms::Sequest::Srf::Out::Pep.new.from_io(fh, unpack_35)
485
488
  num_extra_references += ar[i].num_other_loci
486
489
  end
490
+ num_extra_references = dup_references if num_extra_references > dup_references
487
491
  Ms::Sequest::Srf::Out::Pep.read_extra_references(fh, num_extra_references, ar)
488
492
  ## The xcorrs are already ordered by best to worst hit
489
493
  ## ADJUST the deltacn's to be meaningful for the top hit:
@@ -1,7 +1,9 @@
1
1
 
2
+ require 'tap/task'
2
3
  require 'ms/sequest/srf'
3
4
  require 'ms/mass'
4
5
 
6
+
5
7
  # These are for outputting formats used in MS/MS Search engines
6
8
 
7
9
  module Ms
@@ -1,5 +1,4 @@
1
1
  require 'tap/task'
2
- require 'configurable'
3
2
  require 'ms/sequest'
4
3
  require 'ms/sequest/srf'
5
4
  require 'ms/sequest/sqt'
@@ -59,8 +58,7 @@ module Ms
59
58
  'Comment' => ['Created from Bioworks .srf file']
60
59
  }
61
60
 
62
-
63
- db_filename = header.db_filename
61
+ db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
64
62
  db_filename_in_sqt = db_filename
65
63
  if opt[:new_db_path]
66
64
  db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-18 00:00:00 -06:00
12
+ date: 2009-06-22 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency