ms-sequest 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/History CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.0.9 / 2009-09-08
2
+
3
+ * added capability to read srf files created by reading in .out/.dta folders (combined).
4
+ NOTE: please consider this functionality beta stage as it has not been extensively tested!
5
+ * cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
1
6
 
2
7
  == 0.0.8 / 2009-06-29
3
8
 
data/lib/ms/sequest.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.8'
4
+ VERSION = '0.0.9'
5
5
  end
6
6
  end
@@ -75,7 +75,7 @@ class Ms::Sequest::Params
75
75
  hash = {}
76
76
  in_add_amino_acid_section = false
77
77
  add_section_re = /^\s*add_/
78
- prev_pos = nil
78
+ prev_pos = nil
79
79
  while line = fh.gets
80
80
  if line =~ add_section_re
81
81
  in_add_amino_acid_section = true
@@ -94,11 +94,13 @@ class Ms::Sequest::Params
94
94
  hash
95
95
  end
96
96
 
97
- # returns self
97
+ # returns self or nil if no sequest found in the io
98
98
  def parse_io(fh)
99
99
  # seek to the SEQUEST file
100
100
  loop do
101
- if fh.gets =~ @@sequest_line
101
+ line = fh.gets
102
+ return nil if line.nil? # we return nil if we reach then end of the file without seeing sequest params
103
+ if line =~ @@sequest_line
102
104
  # double check that we are in a sequest params file:
103
105
  pos = fh.pos
104
106
  if fh.gets =~ /^first_database_name/
@@ -235,12 +237,12 @@ class Ms::Sequest::Params
235
237
  when :precursor : precursor_mass_type
236
238
  when :fragment : fragment_mass_type
237
239
  end
238
- case reply
239
- when 'average'
240
- Ms::Mass::AA::AVG
241
- when 'monoisotopic'
242
- Ms::Mass::AA::MONO
243
- end
240
+ case reply
241
+ when 'average'
242
+ Ms::Mass::AA::AVG
243
+ when 'monoisotopic'
244
+ Ms::Mass::AA::MONO
245
+ end
244
246
  end
245
247
 
246
248
  # at least in Bioworks 3.2, the First number after the enzyme
@@ -57,8 +57,7 @@ class Ms::Sequest::Srf
57
57
  handle.seek(params_start_index)
58
58
  Ms::Sequest::Params.new.parse_io(handle)
59
59
  else
60
- warn "#{filename} has no SEQUEST information, may be a truncated/corrupt file!"
61
- nil
60
+ nil # not found
62
61
  end
63
62
  end
64
63
  end
@@ -85,6 +84,9 @@ class Ms::Sequest::Srf
85
84
  # # searches then you probably want to set this to false to avoid
86
85
  # # recalculation.
87
86
  #
87
+ # :read_pephits => true | false (default true)
88
+ # # will attempt to read peptide hit information (equivalent to .out
89
+ # # files), otherwise, just reads the dta information.
88
90
  def initialize(filename=nil, opts={})
89
91
  @peps = []
90
92
 
@@ -143,10 +145,25 @@ class Ms::Sequest::Srf
143
145
  self
144
146
  end
145
147
 
148
+ def read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0)
149
+ dta_files = Array.new(num_files)
150
+ out_files = Array.new(num_files)
151
+ start = dta_start_byte
152
+ fh.pos = start
153
+
154
+ num_files.times do |i|
155
+ dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
156
+ #p dta_files[i]
157
+ out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
158
+ #p out_files[i]
159
+ end
160
+ [dta_files, out_files]
161
+ end
162
+
146
163
  # returns self
147
164
  # opts are the same as for 'new'
148
165
  def from_file(filename, opts)
149
- opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
166
+ opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
150
167
  params = Ms::Sequest::Srf.get_sequest_params(filename)
151
168
  dup_references = 0
152
169
  dup_refs_gt_0 = false
@@ -169,7 +186,7 @@ END
169
186
  dup_refs_gt_0 = true
170
187
  end
171
188
  else
172
- warn "no params file found in srf, could be truncated file!"
189
+ warn "no params file found in srf, could be combined file or truncated/corrupt file!"
173
190
  end
174
191
 
175
192
  File.open(filename, 'rb') do |fh|
@@ -184,24 +201,44 @@ END
184
201
  when '3.5'
185
202
  true
186
203
  end
187
- @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
188
204
 
189
- @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35, dup_refs_gt_0)
190
- if fh.eof?
191
- #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
192
- @params = nil
193
- @index = []
205
+ if @header.combined
206
+ @base_name = File.basename(filename, '.*')
207
+ # I'm not sure why this is the case, but the reported number is too
208
+ # big by one on the 2 files I've seen so far, so we will correct it here!
209
+ @header.dta_gen.num_dta_files = @header.dta_gen.num_dta_files - 1
210
+ if opts[:read_pephits] == false
211
+ raise NotImplementedError, "on combined files must read everything right now!"
212
+ end
213
+ (@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
194
214
  else
195
- @params = Ms::Sequest::Params.new.parse_io(fh)
215
+ @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
216
+ @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
217
+ if opts[:read_pephits]
218
+ @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
219
+ if fh.eof?
220
+ #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
221
+ @params = nil
222
+ @index = []
223
+ end
224
+ end
225
+ end
226
+ start_pos_in_case = fh.pos
227
+ @params = Ms::Sequest::Params.new.parse_io(fh)
228
+ if @params.nil?
229
+ fh.pos = start_pos_in_case
230
+ # seek to the index
231
+ fh.scanf "\000\000\000\000"
232
+ else # we have a params file
196
233
  # This is very sensitive to the grab_params method in sequest params
197
234
  fh.read(12) ## gap between last params entry and index
198
- @index = read_scan_index(fh,@header.num_dta_files)
199
235
  end
236
+ @index = read_scan_index(fh,@header.num_dta_files)
237
+ #p @index
200
238
  end
201
239
 
202
240
 
203
241
  ### UPDATE SOME THINGS:
204
- @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
205
242
  # give each hit a base_name, first_scan, last_scan
206
243
  @index.each_with_index do |ind,i|
207
244
  mass_measured = @dta_files[i][0]
@@ -244,24 +281,19 @@ END
244
281
 
245
282
  # returns an array of dta_files
246
283
  def read_dta_files(fh, num_files, unpack_35)
247
- measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
248
284
  dta_files = Array.new(num_files)
249
285
  start = dta_start_byte
250
- unless fh.pos == start
251
- fh.pos = start
252
- end
286
+ fh.pos = start
253
287
 
254
288
  header.num_dta_files.times do |i|
255
- dta_file = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
256
- measured_mhs[i] = dta_file[0]
257
- dta_files[i] = dta_file
289
+ dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
258
290
  end
259
- [dta_files, measured_mhs]
291
+ dta_files
260
292
  end
261
293
 
262
294
  # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
263
295
  # will put the fh there.
264
- def read_out_files(fh,number_files, measured_mhs, unpack_35, dup_refs_gt_0)
296
+ def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0)
265
297
  out_files = Array.new(number_files)
266
298
  header.num_dta_files.times do |i|
267
299
  out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
@@ -312,6 +344,14 @@ class Ms::Sequest::Srf::Header
312
344
  attr_accessor :params_filename
313
345
  attr_accessor :sequest_log_filename
314
346
 
347
+
348
+ # true if this is a combined file, false if represents a single file
349
+ # this is set by examining the DTAGen object for signs of a single file
350
+ attr_reader :combined
351
+
352
+ __chars_re = Regexp.escape( "\r\0" )
353
+ NEWLINE_OR_NULL_RE = /[#{__chars_re}]/o
354
+
315
355
  def num_dta_files
316
356
  @dta_gen.num_dta_files
317
357
  end
@@ -321,6 +361,11 @@ class Ms::Sequest::Srf::Header
321
361
  st = fh.read(4)
322
362
  @version = '3.' + st.unpack('I').first.to_s
323
363
  @dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
364
+ # if the start_mass end_mass start_scan and end_scan are all zero, its a
365
+ # combined srf file:
366
+ @combined = [0.0, 0.0, 0, 0].zip(%w(start_mass end_mass start_scan end_scan)).all? do |one,two|
367
+ one == @dta_gen.send(two.to_sym)
368
+ end
324
369
 
325
370
  ## get the rest of the info
326
371
  byte_length = Byte_length.dup
@@ -328,19 +373,23 @@ class Ms::Sequest::Srf::Header
328
373
 
329
374
  fh.pos = Start_byte[:enzyme]
330
375
  [:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
331
- send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
376
+ send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param], @combined))
332
377
  end
333
378
  self
334
379
  end
335
380
 
336
381
  private
337
- def get_null_padded_string(fh,bytes)
382
+ def get_null_padded_string(fh, bytes, combined=false)
338
383
  st = fh.read(bytes)
339
384
  # for empty declarations
340
385
  if st[0] == 0x000000
341
386
  return ''
342
387
  end
343
- st.rstrip!
388
+ if combined
389
+ st = st[ 0, st.index(NEWLINE_OR_NULL_RE) ]
390
+ else
391
+ st.rstrip!
392
+ end
344
393
  st
345
394
  end
346
395
 
@@ -497,6 +546,7 @@ class Ms::Sequest::Srf::Out
497
546
  Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
498
547
  end
499
548
  self[6] = ar
549
+ self[4].chomp!
500
550
  self
501
551
  end
502
552
 
@@ -1,4 +1,5 @@
1
1
  require 'tap/task'
2
+ require 'ms/calc'
2
3
  require 'ms/sequest'
3
4
  require 'ms/sequest/srf'
4
5
  require 'ms/sequest/sqt'
@@ -136,9 +137,9 @@ module Ms
136
137
  out_file_total_inten = out_file.total_inten
137
138
  out_file_lowest_sp = out_file.lowest_sp
138
139
  if opt[:round]
139
- dta_file_mh = round(dta_file_mh, mh_dp)
140
- out_file_total_inten = round(out_file_total_inten, tic_dp)
141
- out_file_lowest_sp = round(out_file_lowest_sp, sp_dp)
140
+ dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
141
+ out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
142
+ out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
142
143
  end
143
144
 
144
145
  out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
@@ -148,10 +149,10 @@ module Ms
148
149
  hit_xcorr = hit.xcorr
149
150
  hit_sp = hit.sp
150
151
  if opt[:round]
151
- hit_mh = round(hit_mh, mh_dp)
152
- hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
153
- hit_xcorr = round(hit_xcorr, xcorr_dp)
154
- hit_sp = round(hit_sp, sp_dp)
152
+ hit_mh = Ms::Calc.round(hit_mh, mh_dp)
153
+ hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
154
+ hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
155
+ hit_sp = Ms::Calc.round(hit_sp, sp_dp)
155
156
  end
156
157
  # note that the rank is determined by the order..
157
158
  out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-29 00:00:00 -06:00
12
+ date: 2009-09-08 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -30,7 +30,7 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.0.1
33
+ version: 0.0.2
34
34
  version:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: tap
@@ -55,8 +55,8 @@ dependencies:
55
55
  description: reads .SRF, .SQT and supports conversions
56
56
  email: jtprince@gmail.com
57
57
  executables:
58
- - srf_to_sqt.rb
59
58
  - srf_to_search.rb
59
+ - srf_to_sqt.rb
60
60
  extensions: []
61
61
 
62
62
  extra_rdoc_files:
@@ -64,12 +64,12 @@ extra_rdoc_files:
64
64
  - MIT-LICENSE
65
65
  - History
66
66
  files:
67
+ - lib/ms/sequest.rb
68
+ - lib/ms/sequest/sqt.rb
67
69
  - lib/ms/sequest/params.rb
68
- - lib/ms/sequest/srf/search.rb
69
70
  - lib/ms/sequest/srf/sqt.rb
71
+ - lib/ms/sequest/srf/search.rb
70
72
  - lib/ms/sequest/srf.rb
71
- - lib/ms/sequest/sqt.rb
72
- - lib/ms/sequest.rb
73
73
  - README
74
74
  - MIT-LICENSE
75
75
  - History