ms-sequest 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/History +5 -0
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/params.rb +11 -9
- data/lib/ms/sequest/srf.rb +75 -25
- data/lib/ms/sequest/srf/sqt.rb +8 -7
- metadata +7 -7
data/History
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== 0.0.9 / 2009-09-08
|
2
|
+
|
3
|
+
* added capability to read srf files created by reading in .out/.dta folders (combined).
|
4
|
+
NOTE: please consider this functionality beta stage as it has not been extensively tested!
|
5
|
+
* cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
|
1
6
|
|
2
7
|
== 0.0.8 / 2009-06-29
|
3
8
|
|
data/lib/ms/sequest.rb
CHANGED
data/lib/ms/sequest/params.rb
CHANGED
@@ -75,7 +75,7 @@ class Ms::Sequest::Params
|
|
75
75
|
hash = {}
|
76
76
|
in_add_amino_acid_section = false
|
77
77
|
add_section_re = /^\s*add_/
|
78
|
-
|
78
|
+
prev_pos = nil
|
79
79
|
while line = fh.gets
|
80
80
|
if line =~ add_section_re
|
81
81
|
in_add_amino_acid_section = true
|
@@ -94,11 +94,13 @@ class Ms::Sequest::Params
|
|
94
94
|
hash
|
95
95
|
end
|
96
96
|
|
97
|
-
# returns self
|
97
|
+
# returns self or nil if no sequest found in the io
|
98
98
|
def parse_io(fh)
|
99
99
|
# seek to the SEQUEST file
|
100
100
|
loop do
|
101
|
-
|
101
|
+
line = fh.gets
|
102
|
+
return nil if line.nil? # we return nil if we reach then end of the file without seeing sequest params
|
103
|
+
if line =~ @@sequest_line
|
102
104
|
# double check that we are in a sequest params file:
|
103
105
|
pos = fh.pos
|
104
106
|
if fh.gets =~ /^first_database_name/
|
@@ -235,12 +237,12 @@ class Ms::Sequest::Params
|
|
235
237
|
when :precursor : precursor_mass_type
|
236
238
|
when :fragment : fragment_mass_type
|
237
239
|
end
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
240
|
+
case reply
|
241
|
+
when 'average'
|
242
|
+
Ms::Mass::AA::AVG
|
243
|
+
when 'monoisotopic'
|
244
|
+
Ms::Mass::AA::MONO
|
245
|
+
end
|
244
246
|
end
|
245
247
|
|
246
248
|
# at least in Bioworks 3.2, the First number after the enzyme
|
data/lib/ms/sequest/srf.rb
CHANGED
@@ -57,8 +57,7 @@ class Ms::Sequest::Srf
|
|
57
57
|
handle.seek(params_start_index)
|
58
58
|
Ms::Sequest::Params.new.parse_io(handle)
|
59
59
|
else
|
60
|
-
|
61
|
-
nil
|
60
|
+
nil # not found
|
62
61
|
end
|
63
62
|
end
|
64
63
|
end
|
@@ -85,6 +84,9 @@ class Ms::Sequest::Srf
|
|
85
84
|
# # searches then you probably want to set this to false to avoid
|
86
85
|
# # recalculation.
|
87
86
|
#
|
87
|
+
# :read_pephits => true | false (default true)
|
88
|
+
# # will attempt to read peptide hit information (equivalent to .out
|
89
|
+
# # files), otherwise, just reads the dta information.
|
88
90
|
def initialize(filename=nil, opts={})
|
89
91
|
@peps = []
|
90
92
|
|
@@ -143,10 +145,25 @@ class Ms::Sequest::Srf
|
|
143
145
|
self
|
144
146
|
end
|
145
147
|
|
148
|
+
def read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0)
|
149
|
+
dta_files = Array.new(num_files)
|
150
|
+
out_files = Array.new(num_files)
|
151
|
+
start = dta_start_byte
|
152
|
+
fh.pos = start
|
153
|
+
|
154
|
+
num_files.times do |i|
|
155
|
+
dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
|
156
|
+
#p dta_files[i]
|
157
|
+
out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
|
158
|
+
#p out_files[i]
|
159
|
+
end
|
160
|
+
[dta_files, out_files]
|
161
|
+
end
|
162
|
+
|
146
163
|
# returns self
|
147
164
|
# opts are the same as for 'new'
|
148
165
|
def from_file(filename, opts)
|
149
|
-
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
|
166
|
+
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
|
150
167
|
params = Ms::Sequest::Srf.get_sequest_params(filename)
|
151
168
|
dup_references = 0
|
152
169
|
dup_refs_gt_0 = false
|
@@ -169,7 +186,7 @@ END
|
|
169
186
|
dup_refs_gt_0 = true
|
170
187
|
end
|
171
188
|
else
|
172
|
-
warn "no params file found in srf, could be truncated file!"
|
189
|
+
warn "no params file found in srf, could be combined file or truncated/corrupt file!"
|
173
190
|
end
|
174
191
|
|
175
192
|
File.open(filename, 'rb') do |fh|
|
@@ -184,24 +201,44 @@ END
|
|
184
201
|
when '3.5'
|
185
202
|
true
|
186
203
|
end
|
187
|
-
@dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
|
188
204
|
|
189
|
-
@
|
190
|
-
|
191
|
-
#
|
192
|
-
|
193
|
-
@
|
205
|
+
if @header.combined
|
206
|
+
@base_name = File.basename(filename, '.*')
|
207
|
+
# I'm not sure why this is the case, but the reported number is too
|
208
|
+
# big by one on the 2 files I've seen so far, so we will correct it here!
|
209
|
+
@header.dta_gen.num_dta_files = @header.dta_gen.num_dta_files - 1
|
210
|
+
if opts[:read_pephits] == false
|
211
|
+
raise NotImplementedError, "on combined files must read everything right now!"
|
212
|
+
end
|
213
|
+
(@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
|
194
214
|
else
|
195
|
-
@
|
215
|
+
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
216
|
+
@dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
|
217
|
+
if opts[:read_pephits]
|
218
|
+
@out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
|
219
|
+
if fh.eof?
|
220
|
+
#warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
|
221
|
+
@params = nil
|
222
|
+
@index = []
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
start_pos_in_case = fh.pos
|
227
|
+
@params = Ms::Sequest::Params.new.parse_io(fh)
|
228
|
+
if @params.nil?
|
229
|
+
fh.pos = start_pos_in_case
|
230
|
+
# seek to the index
|
231
|
+
fh.scanf "\000\000\000\000"
|
232
|
+
else # we have a params file
|
196
233
|
# This is very sensitive to the grab_params method in sequest params
|
197
234
|
fh.read(12) ## gap between last params entry and index
|
198
|
-
@index = read_scan_index(fh,@header.num_dta_files)
|
199
235
|
end
|
236
|
+
@index = read_scan_index(fh,@header.num_dta_files)
|
237
|
+
#p @index
|
200
238
|
end
|
201
239
|
|
202
240
|
|
203
241
|
### UPDATE SOME THINGS:
|
204
|
-
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
205
242
|
# give each hit a base_name, first_scan, last_scan
|
206
243
|
@index.each_with_index do |ind,i|
|
207
244
|
mass_measured = @dta_files[i][0]
|
@@ -244,24 +281,19 @@ END
|
|
244
281
|
|
245
282
|
# returns an array of dta_files
|
246
283
|
def read_dta_files(fh, num_files, unpack_35)
|
247
|
-
measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
|
248
284
|
dta_files = Array.new(num_files)
|
249
285
|
start = dta_start_byte
|
250
|
-
|
251
|
-
fh.pos = start
|
252
|
-
end
|
286
|
+
fh.pos = start
|
253
287
|
|
254
288
|
header.num_dta_files.times do |i|
|
255
|
-
|
256
|
-
measured_mhs[i] = dta_file[0]
|
257
|
-
dta_files[i] = dta_file
|
289
|
+
dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
|
258
290
|
end
|
259
|
-
|
291
|
+
dta_files
|
260
292
|
end
|
261
293
|
|
262
294
|
# filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
|
263
295
|
# will put the fh there.
|
264
|
-
def read_out_files(fh,number_files,
|
296
|
+
def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0)
|
265
297
|
out_files = Array.new(number_files)
|
266
298
|
header.num_dta_files.times do |i|
|
267
299
|
out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
|
@@ -312,6 +344,14 @@ class Ms::Sequest::Srf::Header
|
|
312
344
|
attr_accessor :params_filename
|
313
345
|
attr_accessor :sequest_log_filename
|
314
346
|
|
347
|
+
|
348
|
+
# true if this is a combined file, false if represents a single file
|
349
|
+
# this is set by examining the DTAGen object for signs of a single file
|
350
|
+
attr_reader :combined
|
351
|
+
|
352
|
+
__chars_re = Regexp.escape( "\r\0" )
|
353
|
+
NEWLINE_OR_NULL_RE = /[#{__chars_re}]/o
|
354
|
+
|
315
355
|
def num_dta_files
|
316
356
|
@dta_gen.num_dta_files
|
317
357
|
end
|
@@ -321,6 +361,11 @@ class Ms::Sequest::Srf::Header
|
|
321
361
|
st = fh.read(4)
|
322
362
|
@version = '3.' + st.unpack('I').first.to_s
|
323
363
|
@dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
|
364
|
+
# if the start_mass end_mass start_scan and end_scan are all zero, its a
|
365
|
+
# combined srf file:
|
366
|
+
@combined = [0.0, 0.0, 0, 0].zip(%w(start_mass end_mass start_scan end_scan)).all? do |one,two|
|
367
|
+
one == @dta_gen.send(two.to_sym)
|
368
|
+
end
|
324
369
|
|
325
370
|
## get the rest of the info
|
326
371
|
byte_length = Byte_length.dup
|
@@ -328,19 +373,23 @@ class Ms::Sequest::Srf::Header
|
|
328
373
|
|
329
374
|
fh.pos = Start_byte[:enzyme]
|
330
375
|
[:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
|
331
|
-
send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]
|
376
|
+
send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param], @combined))
|
332
377
|
end
|
333
378
|
self
|
334
379
|
end
|
335
380
|
|
336
381
|
private
|
337
|
-
def get_null_padded_string(fh,bytes)
|
382
|
+
def get_null_padded_string(fh, bytes, combined=false)
|
338
383
|
st = fh.read(bytes)
|
339
384
|
# for empty declarations
|
340
385
|
if st[0] == 0x000000
|
341
386
|
return ''
|
342
387
|
end
|
343
|
-
|
388
|
+
if combined
|
389
|
+
st = st[ 0, st.index(NEWLINE_OR_NULL_RE) ]
|
390
|
+
else
|
391
|
+
st.rstrip!
|
392
|
+
end
|
344
393
|
st
|
345
394
|
end
|
346
395
|
|
@@ -497,6 +546,7 @@ class Ms::Sequest::Srf::Out
|
|
497
546
|
Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
|
498
547
|
end
|
499
548
|
self[6] = ar
|
549
|
+
self[4].chomp!
|
500
550
|
self
|
501
551
|
end
|
502
552
|
|
data/lib/ms/sequest/srf/sqt.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'tap/task'
|
2
|
+
require 'ms/calc'
|
2
3
|
require 'ms/sequest'
|
3
4
|
require 'ms/sequest/srf'
|
4
5
|
require 'ms/sequest/sqt'
|
@@ -136,9 +137,9 @@ module Ms
|
|
136
137
|
out_file_total_inten = out_file.total_inten
|
137
138
|
out_file_lowest_sp = out_file.lowest_sp
|
138
139
|
if opt[:round]
|
139
|
-
dta_file_mh = round(dta_file_mh, mh_dp)
|
140
|
-
out_file_total_inten = round(out_file_total_inten, tic_dp)
|
141
|
-
out_file_lowest_sp = round(out_file_lowest_sp, sp_dp)
|
140
|
+
dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
|
141
|
+
out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
|
142
|
+
out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
|
142
143
|
end
|
143
144
|
|
144
145
|
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
@@ -148,10 +149,10 @@ module Ms
|
|
148
149
|
hit_xcorr = hit.xcorr
|
149
150
|
hit_sp = hit.sp
|
150
151
|
if opt[:round]
|
151
|
-
hit_mh = round(hit_mh, mh_dp)
|
152
|
-
hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
|
153
|
-
hit_xcorr = round(hit_xcorr, xcorr_dp)
|
154
|
-
hit_sp = round(hit_sp, sp_dp)
|
152
|
+
hit_mh = Ms::Calc.round(hit_mh, mh_dp)
|
153
|
+
hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
|
154
|
+
hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
|
155
|
+
hit_sp = Ms::Calc.round(hit_sp, sp_dp)
|
155
156
|
end
|
156
157
|
# note that the rank is determined by the order..
|
157
158
|
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-sequest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-08 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.0.
|
33
|
+
version: 0.0.2
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: tap
|
@@ -55,8 +55,8 @@ dependencies:
|
|
55
55
|
description: reads .SRF, .SQT and supports conversions
|
56
56
|
email: jtprince@gmail.com
|
57
57
|
executables:
|
58
|
-
- srf_to_sqt.rb
|
59
58
|
- srf_to_search.rb
|
59
|
+
- srf_to_sqt.rb
|
60
60
|
extensions: []
|
61
61
|
|
62
62
|
extra_rdoc_files:
|
@@ -64,12 +64,12 @@ extra_rdoc_files:
|
|
64
64
|
- MIT-LICENSE
|
65
65
|
- History
|
66
66
|
files:
|
67
|
+
- lib/ms/sequest.rb
|
68
|
+
- lib/ms/sequest/sqt.rb
|
67
69
|
- lib/ms/sequest/params.rb
|
68
|
-
- lib/ms/sequest/srf/search.rb
|
69
70
|
- lib/ms/sequest/srf/sqt.rb
|
71
|
+
- lib/ms/sequest/srf/search.rb
|
70
72
|
- lib/ms/sequest/srf.rb
|
71
|
-
- lib/ms/sequest/sqt.rb
|
72
|
-
- lib/ms/sequest.rb
|
73
73
|
- README
|
74
74
|
- MIT-LICENSE
|
75
75
|
- History
|