ms-sequest 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.0.9 / 2009-09-08
2
+
3
+ * added capability to read srf files created by reading in .out/.dta folders (combined).
4
+ NOTE: please consider this functionality beta stage as it has not been extensively tested!
5
+ * cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
1
6
 
2
7
  == 0.0.8 / 2009-06-29
3
8
 
data/lib/ms/sequest.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.8'
4
+ VERSION = '0.0.9'
5
5
  end
6
6
  end
@@ -75,7 +75,7 @@ class Ms::Sequest::Params
75
75
  hash = {}
76
76
  in_add_amino_acid_section = false
77
77
  add_section_re = /^\s*add_/
78
- prev_pos = nil
78
+ prev_pos = nil
79
79
  while line = fh.gets
80
80
  if line =~ add_section_re
81
81
  in_add_amino_acid_section = true
@@ -94,11 +94,13 @@ class Ms::Sequest::Params
94
94
  hash
95
95
  end
96
96
 
97
- # returns self
97
+ # returns self or nil if no sequest found in the io
98
98
  def parse_io(fh)
99
99
  # seek to the SEQUEST file
100
100
  loop do
101
- if fh.gets =~ @@sequest_line
101
+ line = fh.gets
102
+ return nil if line.nil? # we return nil if we reach then end of the file without seeing sequest params
103
+ if line =~ @@sequest_line
102
104
  # double check that we are in a sequest params file:
103
105
  pos = fh.pos
104
106
  if fh.gets =~ /^first_database_name/
@@ -235,12 +237,12 @@ class Ms::Sequest::Params
235
237
  when :precursor : precursor_mass_type
236
238
  when :fragment : fragment_mass_type
237
239
  end
238
- case reply
239
- when 'average'
240
- Ms::Mass::AA::AVG
241
- when 'monoisotopic'
242
- Ms::Mass::AA::MONO
243
- end
240
+ case reply
241
+ when 'average'
242
+ Ms::Mass::AA::AVG
243
+ when 'monoisotopic'
244
+ Ms::Mass::AA::MONO
245
+ end
244
246
  end
245
247
 
246
248
  # at least in Bioworks 3.2, the First number after the enzyme
@@ -57,8 +57,7 @@ class Ms::Sequest::Srf
57
57
  handle.seek(params_start_index)
58
58
  Ms::Sequest::Params.new.parse_io(handle)
59
59
  else
60
- warn "#{filename} has no SEQUEST information, may be a truncated/corrupt file!"
61
- nil
60
+ nil # not found
62
61
  end
63
62
  end
64
63
  end
@@ -85,6 +84,9 @@ class Ms::Sequest::Srf
85
84
  # # searches then you probably want to set this to false to avoid
86
85
  # # recalculation.
87
86
  #
87
+ # :read_pephits => true | false (default true)
88
+ # # will attempt to read peptide hit information (equivalent to .out
89
+ # # files), otherwise, just reads the dta information.
88
90
  def initialize(filename=nil, opts={})
89
91
  @peps = []
90
92
 
@@ -143,10 +145,25 @@ class Ms::Sequest::Srf
143
145
  self
144
146
  end
145
147
 
148
+ def read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0)
149
+ dta_files = Array.new(num_files)
150
+ out_files = Array.new(num_files)
151
+ start = dta_start_byte
152
+ fh.pos = start
153
+
154
+ num_files.times do |i|
155
+ dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
156
+ #p dta_files[i]
157
+ out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
158
+ #p out_files[i]
159
+ end
160
+ [dta_files, out_files]
161
+ end
162
+
146
163
  # returns self
147
164
  # opts are the same as for 'new'
148
165
  def from_file(filename, opts)
149
- opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
166
+ opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
150
167
  params = Ms::Sequest::Srf.get_sequest_params(filename)
151
168
  dup_references = 0
152
169
  dup_refs_gt_0 = false
@@ -169,7 +186,7 @@ END
169
186
  dup_refs_gt_0 = true
170
187
  end
171
188
  else
172
- warn "no params file found in srf, could be truncated file!"
189
+ warn "no params file found in srf, could be combined file or truncated/corrupt file!"
173
190
  end
174
191
 
175
192
  File.open(filename, 'rb') do |fh|
@@ -184,24 +201,44 @@ END
184
201
  when '3.5'
185
202
  true
186
203
  end
187
- @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
188
204
 
189
- @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35, dup_refs_gt_0)
190
- if fh.eof?
191
- #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
192
- @params = nil
193
- @index = []
205
+ if @header.combined
206
+ @base_name = File.basename(filename, '.*')
207
+ # I'm not sure why this is the case, but the reported number is too
208
+ # big by one on the 2 files I've seen so far, so we will correct it here!
209
+ @header.dta_gen.num_dta_files = @header.dta_gen.num_dta_files - 1
210
+ if opts[:read_pephits] == false
211
+ raise NotImplementedError, "on combined files must read everything right now!"
212
+ end
213
+ (@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
194
214
  else
195
- @params = Ms::Sequest::Params.new.parse_io(fh)
215
+ @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
216
+ @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
217
+ if opts[:read_pephits]
218
+ @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
219
+ if fh.eof?
220
+ #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
221
+ @params = nil
222
+ @index = []
223
+ end
224
+ end
225
+ end
226
+ start_pos_in_case = fh.pos
227
+ @params = Ms::Sequest::Params.new.parse_io(fh)
228
+ if @params.nil?
229
+ fh.pos = start_pos_in_case
230
+ # seek to the index
231
+ fh.scanf "\000\000\000\000"
232
+ else # we have a params file
196
233
  # This is very sensitive to the grab_params method in sequest params
197
234
  fh.read(12) ## gap between last params entry and index
198
- @index = read_scan_index(fh,@header.num_dta_files)
199
235
  end
236
+ @index = read_scan_index(fh,@header.num_dta_files)
237
+ #p @index
200
238
  end
201
239
 
202
240
 
203
241
  ### UPDATE SOME THINGS:
204
- @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
205
242
  # give each hit a base_name, first_scan, last_scan
206
243
  @index.each_with_index do |ind,i|
207
244
  mass_measured = @dta_files[i][0]
@@ -244,24 +281,19 @@ END
244
281
 
245
282
  # returns an array of dta_files
246
283
  def read_dta_files(fh, num_files, unpack_35)
247
- measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
248
284
  dta_files = Array.new(num_files)
249
285
  start = dta_start_byte
250
- unless fh.pos == start
251
- fh.pos = start
252
- end
286
+ fh.pos = start
253
287
 
254
288
  header.num_dta_files.times do |i|
255
- dta_file = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
256
- measured_mhs[i] = dta_file[0]
257
- dta_files[i] = dta_file
289
+ dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
258
290
  end
259
- [dta_files, measured_mhs]
291
+ dta_files
260
292
  end
261
293
 
262
294
  # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
263
295
  # will put the fh there.
264
- def read_out_files(fh,number_files, measured_mhs, unpack_35, dup_refs_gt_0)
296
+ def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0)
265
297
  out_files = Array.new(number_files)
266
298
  header.num_dta_files.times do |i|
267
299
  out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
@@ -312,6 +344,14 @@ class Ms::Sequest::Srf::Header
312
344
  attr_accessor :params_filename
313
345
  attr_accessor :sequest_log_filename
314
346
 
347
+
348
+ # true if this is a combined file, false if represents a single file
349
+ # this is set by examining the DTAGen object for signs of a single file
350
+ attr_reader :combined
351
+
352
+ __chars_re = Regexp.escape( "\r\0" )
353
+ NEWLINE_OR_NULL_RE = /[#{__chars_re}]/o
354
+
315
355
  def num_dta_files
316
356
  @dta_gen.num_dta_files
317
357
  end
@@ -321,6 +361,11 @@ class Ms::Sequest::Srf::Header
321
361
  st = fh.read(4)
322
362
  @version = '3.' + st.unpack('I').first.to_s
323
363
  @dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
364
+ # if the start_mass end_mass start_scan and end_scan are all zero, its a
365
+ # combined srf file:
366
+ @combined = [0.0, 0.0, 0, 0].zip(%w(start_mass end_mass start_scan end_scan)).all? do |one,two|
367
+ one == @dta_gen.send(two.to_sym)
368
+ end
324
369
 
325
370
  ## get the rest of the info
326
371
  byte_length = Byte_length.dup
@@ -328,19 +373,23 @@ class Ms::Sequest::Srf::Header
328
373
 
329
374
  fh.pos = Start_byte[:enzyme]
330
375
  [:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
331
- send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
376
+ send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param], @combined))
332
377
  end
333
378
  self
334
379
  end
335
380
 
336
381
  private
337
- def get_null_padded_string(fh,bytes)
382
+ def get_null_padded_string(fh, bytes, combined=false)
338
383
  st = fh.read(bytes)
339
384
  # for empty declarations
340
385
  if st[0] == 0x000000
341
386
  return ''
342
387
  end
343
- st.rstrip!
388
+ if combined
389
+ st = st[ 0, st.index(NEWLINE_OR_NULL_RE) ]
390
+ else
391
+ st.rstrip!
392
+ end
344
393
  st
345
394
  end
346
395
 
@@ -497,6 +546,7 @@ class Ms::Sequest::Srf::Out
497
546
  Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
498
547
  end
499
548
  self[6] = ar
549
+ self[4].chomp!
500
550
  self
501
551
  end
502
552
 
@@ -1,4 +1,5 @@
1
1
  require 'tap/task'
2
+ require 'ms/calc'
2
3
  require 'ms/sequest'
3
4
  require 'ms/sequest/srf'
4
5
  require 'ms/sequest/sqt'
@@ -136,9 +137,9 @@ module Ms
136
137
  out_file_total_inten = out_file.total_inten
137
138
  out_file_lowest_sp = out_file.lowest_sp
138
139
  if opt[:round]
139
- dta_file_mh = round(dta_file_mh, mh_dp)
140
- out_file_total_inten = round(out_file_total_inten, tic_dp)
141
- out_file_lowest_sp = round(out_file_lowest_sp, sp_dp)
140
+ dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
141
+ out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
142
+ out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
142
143
  end
143
144
 
144
145
  out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
@@ -148,10 +149,10 @@ module Ms
148
149
  hit_xcorr = hit.xcorr
149
150
  hit_sp = hit.sp
150
151
  if opt[:round]
151
- hit_mh = round(hit_mh, mh_dp)
152
- hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
153
- hit_xcorr = round(hit_xcorr, xcorr_dp)
154
- hit_sp = round(hit_sp, sp_dp)
152
+ hit_mh = Ms::Calc.round(hit_mh, mh_dp)
153
+ hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
154
+ hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
155
+ hit_sp = Ms::Calc.round(hit_sp, sp_dp)
155
156
  end
156
157
  # note that the rank is determined by the order..
157
158
  out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-29 00:00:00 -06:00
12
+ date: 2009-09-08 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -30,7 +30,7 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.0.1
33
+ version: 0.0.2
34
34
  version:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: tap
@@ -55,8 +55,8 @@ dependencies:
55
55
  description: reads .SRF, .SQT and supports conversions
56
56
  email: jtprince@gmail.com
57
57
  executables:
58
- - srf_to_sqt.rb
59
58
  - srf_to_search.rb
59
+ - srf_to_sqt.rb
60
60
  extensions: []
61
61
 
62
62
  extra_rdoc_files:
@@ -64,12 +64,12 @@ extra_rdoc_files:
64
64
  - MIT-LICENSE
65
65
  - History
66
66
  files:
67
+ - lib/ms/sequest.rb
68
+ - lib/ms/sequest/sqt.rb
67
69
  - lib/ms/sequest/params.rb
68
- - lib/ms/sequest/srf/search.rb
69
70
  - lib/ms/sequest/srf/sqt.rb
71
+ - lib/ms/sequest/srf/search.rb
70
72
  - lib/ms/sequest/srf.rb
71
- - lib/ms/sequest/sqt.rb
72
- - lib/ms/sequest.rb
73
73
  - README
74
74
  - MIT-LICENSE
75
75
  - History