ms-sequest 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.0.5 / 2009-06-22
2
+
3
+ * fixed handling of files with print_duplicate_references = 0
4
+ * removes .hdr postfix on the fasta path for srf -> SQT output
1
5
 
2
6
  == 0.0.4 / 2009-06-18
3
7
 
data/lib/ms/sequest.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.4'
4
+ VERSION = '0.0.5'
5
5
  end
6
6
  end
@@ -148,19 +148,22 @@ class Ms::Sequest::Srf
148
148
  def from_file(filename, opts)
149
149
  opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
150
150
  params = Ms::Sequest::Srf.get_sequest_params(filename)
151
- dups_gt_0 = false
151
+ dup_references = 0
152
152
  if params
153
- dups = params.print_duplicate_references
154
- if dups == '0'
153
+ dup_references = params.print_duplicate_references.to_i
154
+ if dup_references == 0
155
155
  warn <<END
156
- ***************************************************************************
157
- For complete protein <=> peptide linkages, .srf files must be created with
158
- print_duplicate_references > 0. To capture all duplicate references, set the
159
- sequest parameter 'print_duplicate_references' to 100 or greater.
160
- ***************************************************************************
156
+ *****************************************************************************
157
+ WARNING: This srf file lists only 1 protein per peptide! (based on the
158
+ print_duplicate_references parameter in the sequest.params file used in its
159
+ creation) So, downstream output will likewise only contain a single protein
160
+ for each peptide hit. In many instances this is OK since downstream programs
161
+ will recalculate protein-to-peptide linkages from the database file anyway.
162
+ For complete protein lists per peptide hit, .srf files must be created with
163
+ print_duplicate_references > 0. HINT: to capture all duplicate references,
164
+ set the sequest parameter 'print_duplicate_references' to 100 or greater.
165
+ *****************************************************************************
161
166
  END
162
- else
163
- dups_gt_0 = true
164
167
  end
165
168
  else
166
169
  end
@@ -179,7 +182,7 @@ END
179
182
  end
180
183
  @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
181
184
 
182
- @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35)
185
+ @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35, dup_references)
183
186
  if fh.eof?
184
187
  #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
185
188
  @params = nil
@@ -257,10 +260,10 @@ END
257
260
 
258
261
  # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
259
262
  # will put the fh there.
260
- def read_out_files(fh,number_files, measured_mhs, unpack_35)
263
+ def read_out_files(fh,number_files, measured_mhs, unpack_35, dup_references)
261
264
  out_files = Array.new(number_files)
262
265
  header.num_dta_files.times do |i|
263
- out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35)
266
+ out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_references)
264
267
  end
265
268
  out_files
266
269
  end
@@ -467,7 +470,7 @@ class Ms::Sequest::Srf::Out
467
470
  "<Ms::Sequest::Srf::Out first_scan=#{first_scan}, last_scan=#{last_scan}, charge=#{charge}, num_hits=#{num_hits}, computer=#{computer}, date_time=#{date_time}#{hits_s}>"
468
471
  end
469
472
 
470
- def from_io(fh, unpack_35)
473
+ def from_io(fh, unpack_35, dup_references)
471
474
  ## EMPTY out file is 96 bytes
472
475
  ## each hit is 320 bytes
473
476
  ## num_hits and charge:
@@ -484,6 +487,7 @@ class Ms::Sequest::Srf::Out
484
487
  ar[i] = Ms::Sequest::Srf::Out::Pep.new.from_io(fh, unpack_35)
485
488
  num_extra_references += ar[i].num_other_loci
486
489
  end
490
+ num_extra_references = dup_references if num_extra_references > dup_references
487
491
  Ms::Sequest::Srf::Out::Pep.read_extra_references(fh, num_extra_references, ar)
488
492
  ## The xcorrs are already ordered by best to worst hit
489
493
  ## ADJUST the deltacn's to be meaningful for the top hit:
@@ -1,7 +1,9 @@
1
1
 
2
+ require 'tap/task'
2
3
  require 'ms/sequest/srf'
3
4
  require 'ms/mass'
4
5
 
6
+
5
7
  # These are for outputting formats used in MS/MS Search engines
6
8
 
7
9
  module Ms
@@ -1,5 +1,4 @@
1
1
  require 'tap/task'
2
- require 'configurable'
3
2
  require 'ms/sequest'
4
3
  require 'ms/sequest/srf'
5
4
  require 'ms/sequest/sqt'
@@ -59,8 +58,7 @@ module Ms
59
58
  'Comment' => ['Created from Bioworks .srf file']
60
59
  }
61
60
 
62
-
63
- db_filename = header.db_filename
61
+ db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
64
62
  db_filename_in_sqt = db_filename
65
63
  if opt[:new_db_path]
66
64
  db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-18 00:00:00 -06:00
12
+ date: 2009-06-22 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency