ms-sequest 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +5 -0
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/params.rb +11 -9
- data/lib/ms/sequest/srf.rb +75 -25
- data/lib/ms/sequest/srf/sqt.rb +8 -7
- metadata +7 -7
data/History
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== 0.0.9 / 2009-09-08
|
2
|
+
|
3
|
+
* added capability to read srf files created by reading in .out/.dta folders (combined).
|
4
|
+
NOTE: please consider this functionality beta stage as it has not been extensively tested!
|
5
|
+
* cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
|
1
6
|
|
2
7
|
== 0.0.8 / 2009-06-29
|
3
8
|
|
data/lib/ms/sequest.rb
CHANGED
data/lib/ms/sequest/params.rb
CHANGED
@@ -75,7 +75,7 @@ class Ms::Sequest::Params
|
|
75
75
|
hash = {}
|
76
76
|
in_add_amino_acid_section = false
|
77
77
|
add_section_re = /^\s*add_/
|
78
|
-
|
78
|
+
prev_pos = nil
|
79
79
|
while line = fh.gets
|
80
80
|
if line =~ add_section_re
|
81
81
|
in_add_amino_acid_section = true
|
@@ -94,11 +94,13 @@ class Ms::Sequest::Params
|
|
94
94
|
hash
|
95
95
|
end
|
96
96
|
|
97
|
-
# returns self
|
97
|
+
# returns self or nil if no sequest found in the io
|
98
98
|
def parse_io(fh)
|
99
99
|
# seek to the SEQUEST file
|
100
100
|
loop do
|
101
|
-
|
101
|
+
line = fh.gets
|
102
|
+
return nil if line.nil? # we return nil if we reach then end of the file without seeing sequest params
|
103
|
+
if line =~ @@sequest_line
|
102
104
|
# double check that we are in a sequest params file:
|
103
105
|
pos = fh.pos
|
104
106
|
if fh.gets =~ /^first_database_name/
|
@@ -235,12 +237,12 @@ class Ms::Sequest::Params
|
|
235
237
|
when :precursor : precursor_mass_type
|
236
238
|
when :fragment : fragment_mass_type
|
237
239
|
end
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
240
|
+
case reply
|
241
|
+
when 'average'
|
242
|
+
Ms::Mass::AA::AVG
|
243
|
+
when 'monoisotopic'
|
244
|
+
Ms::Mass::AA::MONO
|
245
|
+
end
|
244
246
|
end
|
245
247
|
|
246
248
|
# at least in Bioworks 3.2, the First number after the enzyme
|
data/lib/ms/sequest/srf.rb
CHANGED
@@ -57,8 +57,7 @@ class Ms::Sequest::Srf
|
|
57
57
|
handle.seek(params_start_index)
|
58
58
|
Ms::Sequest::Params.new.parse_io(handle)
|
59
59
|
else
|
60
|
-
|
61
|
-
nil
|
60
|
+
nil # not found
|
62
61
|
end
|
63
62
|
end
|
64
63
|
end
|
@@ -85,6 +84,9 @@ class Ms::Sequest::Srf
|
|
85
84
|
# # searches then you probably want to set this to false to avoid
|
86
85
|
# # recalculation.
|
87
86
|
#
|
87
|
+
# :read_pephits => true | false (default true)
|
88
|
+
# # will attempt to read peptide hit information (equivalent to .out
|
89
|
+
# # files), otherwise, just reads the dta information.
|
88
90
|
def initialize(filename=nil, opts={})
|
89
91
|
@peps = []
|
90
92
|
|
@@ -143,10 +145,25 @@ class Ms::Sequest::Srf
|
|
143
145
|
self
|
144
146
|
end
|
145
147
|
|
148
|
+
def read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0)
|
149
|
+
dta_files = Array.new(num_files)
|
150
|
+
out_files = Array.new(num_files)
|
151
|
+
start = dta_start_byte
|
152
|
+
fh.pos = start
|
153
|
+
|
154
|
+
num_files.times do |i|
|
155
|
+
dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
|
156
|
+
#p dta_files[i]
|
157
|
+
out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
|
158
|
+
#p out_files[i]
|
159
|
+
end
|
160
|
+
[dta_files, out_files]
|
161
|
+
end
|
162
|
+
|
146
163
|
# returns self
|
147
164
|
# opts are the same as for 'new'
|
148
165
|
def from_file(filename, opts)
|
149
|
-
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
|
166
|
+
opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
|
150
167
|
params = Ms::Sequest::Srf.get_sequest_params(filename)
|
151
168
|
dup_references = 0
|
152
169
|
dup_refs_gt_0 = false
|
@@ -169,7 +186,7 @@ END
|
|
169
186
|
dup_refs_gt_0 = true
|
170
187
|
end
|
171
188
|
else
|
172
|
-
warn "no params file found in srf, could be truncated file!"
|
189
|
+
warn "no params file found in srf, could be combined file or truncated/corrupt file!"
|
173
190
|
end
|
174
191
|
|
175
192
|
File.open(filename, 'rb') do |fh|
|
@@ -184,24 +201,44 @@ END
|
|
184
201
|
when '3.5'
|
185
202
|
true
|
186
203
|
end
|
187
|
-
@dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
|
188
204
|
|
189
|
-
@
|
190
|
-
|
191
|
-
#
|
192
|
-
|
193
|
-
@
|
205
|
+
if @header.combined
|
206
|
+
@base_name = File.basename(filename, '.*')
|
207
|
+
# I'm not sure why this is the case, but the reported number is too
|
208
|
+
# big by one on the 2 files I've seen so far, so we will correct it here!
|
209
|
+
@header.dta_gen.num_dta_files = @header.dta_gen.num_dta_files - 1
|
210
|
+
if opts[:read_pephits] == false
|
211
|
+
raise NotImplementedError, "on combined files must read everything right now!"
|
212
|
+
end
|
213
|
+
(@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
|
194
214
|
else
|
195
|
-
@
|
215
|
+
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
216
|
+
@dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
|
217
|
+
if opts[:read_pephits]
|
218
|
+
@out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
|
219
|
+
if fh.eof?
|
220
|
+
#warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
|
221
|
+
@params = nil
|
222
|
+
@index = []
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
start_pos_in_case = fh.pos
|
227
|
+
@params = Ms::Sequest::Params.new.parse_io(fh)
|
228
|
+
if @params.nil?
|
229
|
+
fh.pos = start_pos_in_case
|
230
|
+
# seek to the index
|
231
|
+
fh.scanf "\000\000\000\000"
|
232
|
+
else # we have a params file
|
196
233
|
# This is very sensitive to the grab_params method in sequest params
|
197
234
|
fh.read(12) ## gap between last params entry and index
|
198
|
-
@index = read_scan_index(fh,@header.num_dta_files)
|
199
235
|
end
|
236
|
+
@index = read_scan_index(fh,@header.num_dta_files)
|
237
|
+
#p @index
|
200
238
|
end
|
201
239
|
|
202
240
|
|
203
241
|
### UPDATE SOME THINGS:
|
204
|
-
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
205
242
|
# give each hit a base_name, first_scan, last_scan
|
206
243
|
@index.each_with_index do |ind,i|
|
207
244
|
mass_measured = @dta_files[i][0]
|
@@ -244,24 +281,19 @@ END
|
|
244
281
|
|
245
282
|
# returns an array of dta_files
|
246
283
|
def read_dta_files(fh, num_files, unpack_35)
|
247
|
-
measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
|
248
284
|
dta_files = Array.new(num_files)
|
249
285
|
start = dta_start_byte
|
250
|
-
|
251
|
-
fh.pos = start
|
252
|
-
end
|
286
|
+
fh.pos = start
|
253
287
|
|
254
288
|
header.num_dta_files.times do |i|
|
255
|
-
|
256
|
-
measured_mhs[i] = dta_file[0]
|
257
|
-
dta_files[i] = dta_file
|
289
|
+
dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
|
258
290
|
end
|
259
|
-
|
291
|
+
dta_files
|
260
292
|
end
|
261
293
|
|
262
294
|
# filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
|
263
295
|
# will put the fh there.
|
264
|
-
def read_out_files(fh,number_files,
|
296
|
+
def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0)
|
265
297
|
out_files = Array.new(number_files)
|
266
298
|
header.num_dta_files.times do |i|
|
267
299
|
out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
|
@@ -312,6 +344,14 @@ class Ms::Sequest::Srf::Header
|
|
312
344
|
attr_accessor :params_filename
|
313
345
|
attr_accessor :sequest_log_filename
|
314
346
|
|
347
|
+
|
348
|
+
# true if this is a combined file, false if represents a single file
|
349
|
+
# this is set by examining the DTAGen object for signs of a single file
|
350
|
+
attr_reader :combined
|
351
|
+
|
352
|
+
__chars_re = Regexp.escape( "\r\0" )
|
353
|
+
NEWLINE_OR_NULL_RE = /[#{__chars_re}]/o
|
354
|
+
|
315
355
|
def num_dta_files
|
316
356
|
@dta_gen.num_dta_files
|
317
357
|
end
|
@@ -321,6 +361,11 @@ class Ms::Sequest::Srf::Header
|
|
321
361
|
st = fh.read(4)
|
322
362
|
@version = '3.' + st.unpack('I').first.to_s
|
323
363
|
@dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
|
364
|
+
# if the start_mass end_mass start_scan and end_scan are all zero, its a
|
365
|
+
# combined srf file:
|
366
|
+
@combined = [0.0, 0.0, 0, 0].zip(%w(start_mass end_mass start_scan end_scan)).all? do |one,two|
|
367
|
+
one == @dta_gen.send(two.to_sym)
|
368
|
+
end
|
324
369
|
|
325
370
|
## get the rest of the info
|
326
371
|
byte_length = Byte_length.dup
|
@@ -328,19 +373,23 @@ class Ms::Sequest::Srf::Header
|
|
328
373
|
|
329
374
|
fh.pos = Start_byte[:enzyme]
|
330
375
|
[:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
|
331
|
-
send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]
|
376
|
+
send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param], @combined))
|
332
377
|
end
|
333
378
|
self
|
334
379
|
end
|
335
380
|
|
336
381
|
private
|
337
|
-
def get_null_padded_string(fh,bytes)
|
382
|
+
def get_null_padded_string(fh, bytes, combined=false)
|
338
383
|
st = fh.read(bytes)
|
339
384
|
# for empty declarations
|
340
385
|
if st[0] == 0x000000
|
341
386
|
return ''
|
342
387
|
end
|
343
|
-
|
388
|
+
if combined
|
389
|
+
st = st[ 0, st.index(NEWLINE_OR_NULL_RE) ]
|
390
|
+
else
|
391
|
+
st.rstrip!
|
392
|
+
end
|
344
393
|
st
|
345
394
|
end
|
346
395
|
|
@@ -497,6 +546,7 @@ class Ms::Sequest::Srf::Out
|
|
497
546
|
Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
|
498
547
|
end
|
499
548
|
self[6] = ar
|
549
|
+
self[4].chomp!
|
500
550
|
self
|
501
551
|
end
|
502
552
|
|
data/lib/ms/sequest/srf/sqt.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'tap/task'
|
2
|
+
require 'ms/calc'
|
2
3
|
require 'ms/sequest'
|
3
4
|
require 'ms/sequest/srf'
|
4
5
|
require 'ms/sequest/sqt'
|
@@ -136,9 +137,9 @@ module Ms
|
|
136
137
|
out_file_total_inten = out_file.total_inten
|
137
138
|
out_file_lowest_sp = out_file.lowest_sp
|
138
139
|
if opt[:round]
|
139
|
-
dta_file_mh = round(dta_file_mh, mh_dp)
|
140
|
-
out_file_total_inten = round(out_file_total_inten, tic_dp)
|
141
|
-
out_file_lowest_sp = round(out_file_lowest_sp, sp_dp)
|
140
|
+
dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
|
141
|
+
out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
|
142
|
+
out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
|
142
143
|
end
|
143
144
|
|
144
145
|
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
@@ -148,10 +149,10 @@ module Ms
|
|
148
149
|
hit_xcorr = hit.xcorr
|
149
150
|
hit_sp = hit.sp
|
150
151
|
if opt[:round]
|
151
|
-
hit_mh = round(hit_mh, mh_dp)
|
152
|
-
hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
|
153
|
-
hit_xcorr = round(hit_xcorr, xcorr_dp)
|
154
|
-
hit_sp = round(hit_sp, sp_dp)
|
152
|
+
hit_mh = Ms::Calc.round(hit_mh, mh_dp)
|
153
|
+
hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
|
154
|
+
hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
|
155
|
+
hit_sp = Ms::Calc.round(hit_sp, sp_dp)
|
155
156
|
end
|
156
157
|
# note that the rank is determined by the order..
|
157
158
|
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-sequest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-08 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.0.
|
33
|
+
version: 0.0.2
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: tap
|
@@ -55,8 +55,8 @@ dependencies:
|
|
55
55
|
description: reads .SRF, .SQT and supports conversions
|
56
56
|
email: jtprince@gmail.com
|
57
57
|
executables:
|
58
|
-
- srf_to_sqt.rb
|
59
58
|
- srf_to_search.rb
|
59
|
+
- srf_to_sqt.rb
|
60
60
|
extensions: []
|
61
61
|
|
62
62
|
extra_rdoc_files:
|
@@ -64,12 +64,12 @@ extra_rdoc_files:
|
|
64
64
|
- MIT-LICENSE
|
65
65
|
- History
|
66
66
|
files:
|
67
|
+
- lib/ms/sequest.rb
|
68
|
+
- lib/ms/sequest/sqt.rb
|
67
69
|
- lib/ms/sequest/params.rb
|
68
|
-
- lib/ms/sequest/srf/search.rb
|
69
70
|
- lib/ms/sequest/srf/sqt.rb
|
71
|
+
- lib/ms/sequest/srf/search.rb
|
70
72
|
- lib/ms/sequest/srf.rb
|
71
|
-
- lib/ms/sequest/sqt.rb
|
72
|
-
- lib/ms/sequest.rb
|
73
73
|
- README
|
74
74
|
- MIT-LICENSE
|
75
75
|
- History
|