mspire 0.1.5 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +5 -2
- data/bin/bioworks_to_pepxml.rb +84 -40
- data/bin/fasta_shaker.rb +100 -0
- data/bin/filter_spec_id.rb +185 -23
- data/bin/gi2annot.rb +2 -110
- data/bin/id_class_anal.rb +31 -21
- data/bin/id_precision.rb +12 -8
- data/bin/{false_positive_rate.rb → precision.rb} +1 -1
- data/bin/protein_summary.rb +55 -62
- data/changelog.txt +34 -0
- data/lib/align.rb +0 -1
- data/lib/fasta.rb +88 -24
- data/lib/gi.rb +114 -0
- data/lib/roc.rb +64 -58
- data/lib/spec_id/aa_freqs.rb +166 -0
- data/lib/spec_id/bioworks.rb +5 -1
- data/lib/spec_id/precision.rb +427 -0
- data/lib/spec_id/proph.rb +2 -2
- data/lib/spec_id/sequest.rb +810 -113
- data/lib/spec_id/srf.rb +486 -0
- data/lib/spec_id.rb +107 -23
- data/release_notes.txt +11 -0
- data/script/estimate_fpr_by_cysteine.rb +226 -0
- data/script/filter-peps.rb +3 -3
- data/script/find_cysteine_background.rb +137 -0
- data/script/gen_database_searching.rb +11 -7
- data/script/genuine_tps_and_probs.rb +136 -0
- data/script/top_hit_per_scan.rb +5 -2
- data/test/tc_aa_freqs.rb +59 -0
- data/test/tc_bioworks.rb +6 -1
- data/test/tc_bioworks_to_pepxml.rb +25 -18
- data/test/tc_fasta.rb +81 -3
- data/test/tc_fasta_shaker.rb +147 -0
- data/test/tc_gi.rb +20 -0
- data/test/tc_id_class_anal.rb +9 -12
- data/test/tc_id_precision.rb +12 -11
- data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
- data/test/tc_protein_summary.rb +31 -22
- data/test/tc_roc.rb +95 -50
- data/test/tc_sequest.rb +212 -145
- data/test/tc_spec.rb +10 -5
- data/test/tc_spec_id.rb +0 -2
- data/test/tc_spec_id_xml.rb +36 -0
- data/test/tc_srf.rb +216 -0
- metadata +35 -21
- data/lib/spec_id/false_positive_rate.rb +0 -476
- data/test/tc_gi2annot.rb +0 -12
data/lib/spec_id/srf.rb
ADDED
@@ -0,0 +1,486 @@
|
|
1
|
+
require 'spec_id/sequest'
|
2
|
+
|
3
|
+
module BinaryReader
|
4
|
+
Null_char = "\0"[0] ## change for ruby 1.9 or 2.0
|
5
|
+
# extracts a string with all empty chars at the end stripped
|
6
|
+
# expects the filehandle to be at the proper location
|
7
|
+
def get_null_padded_string(fh,bytes)
|
8
|
+
st = fh.read(bytes)
|
9
|
+
# for empty declarations
|
10
|
+
if st[0] == Null_char
|
11
|
+
return ''
|
12
|
+
end
|
13
|
+
st.rstrip!
|
14
|
+
st
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# class to extract information from <file>_dta.log files
|
19
|
+
class DTALog
|
20
|
+
# returns an array indexed by the dta file number (starting at 0)
|
21
|
+
# each entry is an array [first_scan, last_scan, dta_filename_noext]
|
22
|
+
# this is now obsolete since I found the scan # index at the end of the srf
|
23
|
+
# files
|
24
|
+
def self.dta_and_scans_by_dta_index(file)
|
25
|
+
dta_index = nil
|
26
|
+
final_scan = nil
|
27
|
+
dta_cnt = 0
|
28
|
+
re = /^ m/o
|
29
|
+
scan_line_re = /scan: (\d+) - (\d+), Datafile: (.*?) (.*)/o
|
30
|
+
other_dta_re = /Datafile: (.*?) /o
|
31
|
+
File.open(file) do |fh|
|
32
|
+
10.times { fh.readline }
|
33
|
+
scan_range_line = fh.readline
|
34
|
+
if scan_range_line =~ /scan range\s+= \d+ - (\d+)/
|
35
|
+
# this is an overestimate (since MS scans have no dta, but that's OK)
|
36
|
+
dta_index = Array.new($1.to_i)
|
37
|
+
else
|
38
|
+
dta_index = []
|
39
|
+
end
|
40
|
+
3.times { fh.readline }
|
41
|
+
fh.each do |line|
|
42
|
+
if line =~ re
|
43
|
+
if line =~ scan_line_re
|
44
|
+
first_scan = $1.to_i
|
45
|
+
last_scan = $2.to_i
|
46
|
+
the_rest = $4.dup
|
47
|
+
dta_index[dta_cnt] = [first_scan, last_scan, $3.sub(/\.dta/,'')]
|
48
|
+
dta_cnt += 1
|
49
|
+
if the_rest =~ other_dta_re
|
50
|
+
dta_index[dta_cnt] = [first_scan, last_scan, $1.sub(/\.dta/,'')]
|
51
|
+
dta_cnt += 1
|
52
|
+
end
|
53
|
+
end
|
54
|
+
break
|
55
|
+
end
|
56
|
+
end
|
57
|
+
fh.each do |line|
|
58
|
+
if line =~ scan_line_re
|
59
|
+
first_scan = $1.to_i
|
60
|
+
last_scan = $2.to_i
|
61
|
+
the_rest = $4.dup
|
62
|
+
dta_index[dta_cnt] = [first_scan, last_scan, $3.sub(/\.dta/,'')]
|
63
|
+
dta_cnt += 1
|
64
|
+
if the_rest =~ other_dta_re
|
65
|
+
dta_index[dta_cnt] = [first_scan, last_scan, $1.sub(/\.dta/,'')]
|
66
|
+
dta_cnt += 1
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
dta_index.compact! # remove those trailing nils
|
72
|
+
dta_index
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class SRF
|
77
|
+
|
78
|
+
# a string 3.3 or 3.2
|
79
|
+
attr_accessor :version
|
80
|
+
|
81
|
+
attr_accessor :header
|
82
|
+
attr_accessor :dta_files
|
83
|
+
attr_accessor :out_files
|
84
|
+
attr_accessor :params
|
85
|
+
# a parallel array to dta_files and out_files where each entry is:
|
86
|
+
# [first_scan, last_scan, charge]
|
87
|
+
attr_accessor :index
|
88
|
+
|
89
|
+
def dta_start_byte
|
90
|
+
case @version
|
91
|
+
when '3.2' ; 3260
|
92
|
+
when '3.3' ; 3644
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def initialize(filename=nil)
|
97
|
+
@dta_files = []
|
98
|
+
@out_files = []
|
99
|
+
if filename
|
100
|
+
from_file(filename)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# returns self
|
105
|
+
def from_file(filename)
|
106
|
+
|
107
|
+
File.open(filename, "rb") do |fh|
|
108
|
+
@header = SRF::Header.new.from_handle(fh)
|
109
|
+
@version = @header.version
|
110
|
+
@dta_files = read_dta_files(fh,@header.num_dta_files)
|
111
|
+
@out_files = read_out_files(fh,@header.num_dta_files)
|
112
|
+
@params = SpecID::Sequest::Params.new.parse_handle(fh)
|
113
|
+
fh.read(12) ## gap between last params entry and index
|
114
|
+
@index = read_scan_index(fh,@header.num_dta_files)
|
115
|
+
end
|
116
|
+
self
|
117
|
+
end
|
118
|
+
|
119
|
+
# returns an index where each entry is [first_scan, last_scan, charge]
|
120
|
+
def read_scan_index(fh, num)
|
121
|
+
ind_len = 24
|
122
|
+
index = Array.new(num)
|
123
|
+
unpack_string = 'III'
|
124
|
+
st = ''
|
125
|
+
ind_len.times do st << '0' end ## create a 24 byte string to receive data
|
126
|
+
num.times do |i|
|
127
|
+
fh.read(ind_len, st)
|
128
|
+
index[i] = st.unpack(unpack_string)
|
129
|
+
end
|
130
|
+
index
|
131
|
+
end
|
132
|
+
|
133
|
+
# given a zero indexed list where each entry is [first_scan, last_scan,
|
134
|
+
# dta_filename] updates the out info
|
135
|
+
# returns self
|
136
|
+
def update_out_scan_info_from_dta_log(dta_log)
|
137
|
+
index = DTALog.dta_and_scans_by_dta_index(dta_log)
|
138
|
+
@out_files.each_with_index do |ot,i|
|
139
|
+
ot[4,3] = index[i] #contingent on implementation of ot
|
140
|
+
end
|
141
|
+
self
|
142
|
+
end
|
143
|
+
|
144
|
+
# returns an array of dta_files
|
145
|
+
def read_dta_files(fh, num_files)
|
146
|
+
dta_files = Array.new(num_files)
|
147
|
+
start = dta_start_byte
|
148
|
+
unless fh.pos == start
|
149
|
+
fh.pos = start
|
150
|
+
end
|
151
|
+
header.num_dta_files.times do |i|
|
152
|
+
dta_files[i] = SRF::DTA.new.from_handle(fh)
|
153
|
+
end
|
154
|
+
dta_files
|
155
|
+
end
|
156
|
+
|
157
|
+
# filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
|
158
|
+
# will put the fh there.
|
159
|
+
def read_out_files(fh,number_files)
|
160
|
+
out_files = Array.new(number_files)
|
161
|
+
header.num_dta_files.times do |i|
|
162
|
+
#if i == header.num_dta_files - 2
|
163
|
+
# abort
|
164
|
+
#end
|
165
|
+
out_files[i] = SRF::OUT.new.from_handle(fh)
|
166
|
+
end
|
167
|
+
out_files
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
|
172
|
+
class SRF::Header
|
173
|
+
include BinaryReader
|
174
|
+
|
175
|
+
Start_byte = {
|
176
|
+
:enzyme => 438,
|
177
|
+
:ion_series => 694,
|
178
|
+
:model => 950,
|
179
|
+
:modifications => 982,
|
180
|
+
:raw_filename => 1822,
|
181
|
+
:db_filename => 2082,
|
182
|
+
:dta_log_filename => 2602,
|
183
|
+
:params_filename => 3122,
|
184
|
+
:sequest_log_filename => 3382,
|
185
|
+
}
|
186
|
+
Byte_length = {
|
187
|
+
:enzyme => 256,
|
188
|
+
:ion_series => 256,
|
189
|
+
:model => 32,
|
190
|
+
:modifications => 840,
|
191
|
+
:raw_filename => 260,
|
192
|
+
:db_filename => 520,
|
193
|
+
:dta_log_filename => 520,
|
194
|
+
:params_filename => 260,
|
195
|
+
:sequest_log_filename => 262, ## is this really 262?? or should be 260??
|
196
|
+
}
|
197
|
+
Byte_length_v32 = {
|
198
|
+
:modifications => 456,
|
199
|
+
}
|
200
|
+
|
201
|
+
# a SRF::DTAGen object
|
202
|
+
attr_accessor :version
|
203
|
+
attr_accessor :dta_gen
|
204
|
+
attr_accessor :enzyme
|
205
|
+
attr_accessor :ion_series
|
206
|
+
attr_accessor :model
|
207
|
+
attr_accessor :modifications
|
208
|
+
attr_accessor :raw_filename
|
209
|
+
attr_accessor :db_filename
|
210
|
+
attr_accessor :dta_log_filename
|
211
|
+
attr_accessor :params_filename
|
212
|
+
attr_accessor :sequest_log_filename
|
213
|
+
|
214
|
+
def num_dta_files
|
215
|
+
@dta_gen.num_dta_files
|
216
|
+
end
|
217
|
+
|
218
|
+
# sets fh to 0 and grabs the information it wants
|
219
|
+
def from_handle(fh)
|
220
|
+
st = fh.read(4)
|
221
|
+
@version = '3.' + st.unpack('I').first.to_s
|
222
|
+
@dta_gen = SRF::DTAGen.new.from_handle(fh)
|
223
|
+
|
224
|
+
## get the rest of the info
|
225
|
+
byte_length = Byte_length.dup
|
226
|
+
byte_length.merge! Byte_length_v32 if @version == '3.2'
|
227
|
+
|
228
|
+
fh.pos = Start_byte[:enzyme]
|
229
|
+
[:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
|
230
|
+
send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
|
231
|
+
end
|
232
|
+
self
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
# the DTA Generation Params
|
238
|
+
class SRF::DTAGen
|
239
|
+
|
240
|
+
## not sure if this is correct
|
241
|
+
attr_accessor :start_time
|
242
|
+
# group scan (not sure if this is correct)
|
243
|
+
attr_accessor :start_mass
|
244
|
+
attr_accessor :end_mass
|
245
|
+
attr_accessor :num_dta_files
|
246
|
+
attr_accessor :group_scan
|
247
|
+
## not sure if this is correct
|
248
|
+
attr_accessor :min_group_count
|
249
|
+
attr_accessor :min_ion_threshold
|
250
|
+
#attr_accessor :intensity_threshold # can't find yet
|
251
|
+
#attr_accessor :precursor_tolerance # can't find yet
|
252
|
+
attr_accessor :start_scan
|
253
|
+
attr_accessor :end_scan
|
254
|
+
|
255
|
+
#
|
256
|
+
def from_handle(fh)
|
257
|
+
fh.pos = 0 if fh.pos != 0
|
258
|
+
st = fh.read(148)
|
259
|
+
(@start_time, @start_mass, @end_mass, @num_dta_files, @group_scan, @min_group_count, @min_ion_threshold, @start_scan, @end_scan) = st.unpack('x36ex12ex4ex48Ix12IIIII')
|
260
|
+
self
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
class SRF::DTA < Array
|
265
|
+
|
266
|
+
# is this universal?
|
267
|
+
First_record_start_byte = 3644
|
268
|
+
|
269
|
+
## mucky details. This should be encapsulated into a class to inherit from, etc.
|
270
|
+
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
271
|
+
ind_keys = {:mh => 0, :dta_tic => 1, :num_peaks => 2, :charge => 3, :ms_level => 4, :unknown => 5, :total_num_possible_charge_states => 6, :peaks => 7}
|
272
|
+
@@arr_size = ind_keys.size
|
273
|
+
|
274
|
+
def mh ; self[0] end ; def mh=(oth) ; self[0] = oth end
|
275
|
+
def dta_tic ; self[1] end ; def dta_tic=(oth) ; self[1] = oth end
|
276
|
+
def num_peaks ; self[2] end ; def num_peaks=(oth) ; self[2] = oth end
|
277
|
+
def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
|
278
|
+
def ms_level ; self[4] end ; def ms_level=(oth) ; self[4] = oth end
|
279
|
+
def unknown ; self[5] end ; def unknown=(oth) ; self[5] = oth end
|
280
|
+
def total_num_possible_charge_states ; self[6] end ; def total_num_possible_charge_states=(oth) ; self[6] = oth end
|
281
|
+
|
282
|
+
# this is a byte array of floats, you can get the peaks out with
|
283
|
+
# unpack("e*")
|
284
|
+
def peaks ; self[7] end
|
285
|
+
# this is a byte array of floats, you can get the peaks out with
|
286
|
+
def peaks=(oth) ; self[7] = oth end
|
287
|
+
|
288
|
+
@@arr_size = ind_keys.size
|
289
|
+
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
290
|
+
ind_keys.merge!(ind_keys_w_eq)
|
291
|
+
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
292
|
+
|
293
|
+
def initialize(args=nil)
|
294
|
+
super(@@arr_size.size)
|
295
|
+
if args
|
296
|
+
if args.is_a? Hash
|
297
|
+
args.each do |k,v|
|
298
|
+
self[@@ind[k]] = v
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def inspect
|
305
|
+
peaks_st = 'nil'
|
306
|
+
if self[7] ; peaks_st = "[#{self[7].size} bytes]" end
|
307
|
+
"<SRF::DTA @mh=#{mh} @dta_tic=#{dta_tic} @num_peaks=#{num_peaks} @charge=#{charge} @ms_level=#{ms_level} @total_num_possible_charge_states=#{total_num_possible_charge_states} @peaks=#{peaks_st} >"
|
308
|
+
end
|
309
|
+
|
310
|
+
def from_handle(fh)
|
311
|
+
st = fh.read(24)
|
312
|
+
# get the bulk of the data in single unpack
|
313
|
+
self[0,7] = st.unpack("EeIvvvv")
|
314
|
+
|
315
|
+
# Scan numbers possibly hidden in this next sequence of bytes (I think)
|
316
|
+
st2 = fh.read(24)
|
317
|
+
|
318
|
+
num_bytes_to_read = num_peaks * 8
|
319
|
+
st3 = fh.read(num_bytes_to_read)
|
320
|
+
self[7] = st3
|
321
|
+
self
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|
325
|
+
|
326
|
+
|
327
|
+
class SRF::OUT < Array
|
328
|
+
## mucky details. This should be encapsulated into a class to inherit from, etc.
|
329
|
+
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
330
|
+
ind_keys = {:num_hits => 0, :charge => 1, :computer => 2, :date_time => 3, :first_scan => 4, :last_scan => 5, :filename_noext => 6, :hits => 7}
|
331
|
+
@@arr_size = ind_keys.size
|
332
|
+
|
333
|
+
def num_hits ; self[0] end ; def num_hits=(oth) ; self[0] = oth end
|
334
|
+
def charge ; self[1] end ; def charge=(oth) ; self[1] = oth end
|
335
|
+
def computer ; self[2] end ; def computer=(oth) ; self[2] = oth end
|
336
|
+
def date_time ; self[3] end ; def date_time=(oth) ; self[3] = oth end
|
337
|
+
def first_scan ; self[4] end ; def first_scan=(oth) ; self[4] = oth end
|
338
|
+
def last_scan ; self[5] end ; def last_scan=(oth) ; self[5] = oth end
|
339
|
+
def filename_noext ; self[6] end ; def filename_noext=(oth) ; self[6] = oth end
|
340
|
+
def hits ; self[7] end ; def hits=(oth) ; self[7] = oth end
|
341
|
+
|
342
|
+
@@arr_size = ind_keys.size
|
343
|
+
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
344
|
+
ind_keys.merge!(ind_keys_w_eq)
|
345
|
+
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
346
|
+
|
347
|
+
def initialize(args=nil)
|
348
|
+
super(@@arr_size.size)
|
349
|
+
if args
|
350
|
+
if args.is_a? Hash
|
351
|
+
args.each do |k,v|
|
352
|
+
self[@@ind[k]] = v
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
def inspect
|
359
|
+
if first_scan
|
360
|
+
ins = "@first_scan=#{first_scan}, @last_scan=#{last_scan}, @filename_noext=#{filename_noext}, "
|
361
|
+
end
|
362
|
+
"<SRF::OUT @num_hits=#{num_hits}, @charge=#{charge}, @computer=#{computer}, @date_time=#{date_time}, #{ins}@hits=#{hits.inspect}>"
|
363
|
+
end
|
364
|
+
|
365
|
+
def from_handle(fh)
|
366
|
+
## EMPTY out file is 96 bytes
|
367
|
+
## each hit is 320 bytes
|
368
|
+
## num_hits and charge:
|
369
|
+
st = fh.read(96)
|
370
|
+
self[0,4] = st.unpack("@36vvZ*@60Z*")
|
371
|
+
num_hits = self[0]
|
372
|
+
|
373
|
+
ar = Array.new(num_hits)
|
374
|
+
num_hits.times do |i|
|
375
|
+
ar[i] = SRF::OUT::Hit.new.from_handle(fh)
|
376
|
+
end
|
377
|
+
self[7] = ar
|
378
|
+
|
379
|
+
self
|
380
|
+
end
|
381
|
+
|
382
|
+
end
|
383
|
+
|
384
|
+
class SRF::OUT::Hit < Array
|
385
|
+
FourNullBytes_as_string = "\0\0\0\0"
|
386
|
+
#NewRecordStart = "\0\0" + 0x3a.chr + 0x1a.chr + "\0\0"
|
387
|
+
NewRecordStart = 0x01.chr + 0x00.chr
|
388
|
+
Sequest_record_start = "[SEQUEST]"
|
389
|
+
|
390
|
+
## mucky details. This should be encapsulated into a class to inherit from, etc.
|
391
|
+
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
392
|
+
ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
|
393
|
+
@@arr_size = ind_keys.size
|
394
|
+
|
395
|
+
def mh ; self[0] end ; def mh=(oth) ; self[0] = oth end
|
396
|
+
def deltacn ; self[1] end ; def deltacn=(oth) ; self[1] = oth end
|
397
|
+
def sp ; self[2] end ; def sp=(oth) ; self[2] = oth end
|
398
|
+
def xcorr ; self[3] end ; def xcorr=(oth) ; self[3] = oth end
|
399
|
+
def id ; self[4] end ; def id=(oth) ; self[4] = oth end
|
400
|
+
def rsp ; self[5] end ; def rsp=(oth) ; self[5] = oth end
|
401
|
+
def ions_matched ; self[6] end ; def ions_matched=(oth) ; self[6] = oth end
|
402
|
+
def ions_total ; self[7] end ; def ions_total=(oth) ; self[7] = oth end
|
403
|
+
def peptide ; self[8] end ; def peptide=(oth) ; self[8] = oth end
|
404
|
+
def reference ; self[9] end ; def reference=(oth) ; self[9] = oth end
|
405
|
+
# The number of total proteins sharing this peptide
|
406
|
+
def num_tot_proteins ; self[10] end ; def num_tot_proteins=(oth) ; self[10] = oth end
|
407
|
+
|
408
|
+
def initialize(args=nil)
|
409
|
+
super(@@arr_size.size)
|
410
|
+
if args
|
411
|
+
if args.is_a? Hash
|
412
|
+
args.each do |k,v|
|
413
|
+
self[@@ind[k]] = v
|
414
|
+
end
|
415
|
+
end
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
def inspect
|
420
|
+
"<SRF::OUT::Hit @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @peptide=#{peptide}, @reference=#{reference}, @num_tot_proteins=#{num_tot_proteins}>"
|
421
|
+
end
|
422
|
+
|
423
|
+
## There must be a better way to do this.
|
424
|
+
## We are checking that there are no additional protein references only
|
425
|
+
## so that we are in register for the next reading
|
426
|
+
def read_extra_references(fh)
|
427
|
+
$SRF_OUT_HIT_FH_POS = fh.pos
|
428
|
+
st = fh.read(4)
|
429
|
+
#puts "HHH: " + st.unpack("H*").first
|
430
|
+
## if we see 0000 0000 we are done
|
431
|
+
if st.unpack("a*").first == FourNullBytes_as_string
|
432
|
+
fh.pos = $SRF_OUT_HIT_FH_POS
|
433
|
+
return nil
|
434
|
+
end
|
435
|
+
# read in context of 4 bytes read above:
|
436
|
+
|
437
|
+
## NOTE: in context of 4 bytes read above!
|
438
|
+
st = fh.read(36)
|
439
|
+
#p self
|
440
|
+
#puts "HHHH: " + st.unpack("H*").first
|
441
|
+
#puts st[34,2].unpack("H*").first
|
442
|
+
if st[34,2] == NewRecordStart
|
443
|
+
fh.pos = $SRF_OUT_HIT_FH_POS
|
444
|
+
return nil
|
445
|
+
end
|
446
|
+
|
447
|
+
##if st.unpack("@22H*").first == NewRecordStart_as_hex
|
448
|
+
#if st[22,6] == NewRecordStart
|
449
|
+
# fh.pos = $SRF_OUT_HIT_FH_POS
|
450
|
+
# return nil
|
451
|
+
#end
|
452
|
+
|
453
|
+
# is this the end of the outfiles?
|
454
|
+
## BACK to beginning of this section
|
455
|
+
fh.pos = $SRF_OUT_HIT_FH_POS
|
456
|
+
if fh.read(9) == Sequest_record_start
|
457
|
+
fh.pos = $SRF_OUT_HIT_FH_POS
|
458
|
+
return
|
459
|
+
end
|
460
|
+
|
461
|
+
## we have extra references
|
462
|
+
self[10] += 1
|
463
|
+
fh.read(79)
|
464
|
+
|
465
|
+
#p self
|
466
|
+
#$glob ||= 0
|
467
|
+
#$glob += 1
|
468
|
+
#if $glob == 100
|
469
|
+
# abort
|
470
|
+
#end
|
471
|
+
|
472
|
+
read_extra_references(fh)
|
473
|
+
end
|
474
|
+
|
475
|
+
|
476
|
+
def from_handle(fh)
|
477
|
+
## get the first part of the info
|
478
|
+
st = fh.read(320) ## read all the hit data
|
479
|
+
self[0,10] = st.unpack('@64Ex8ex12eeIx18vvvx8Z*@240Z*')
|
480
|
+
self[10] = 1
|
481
|
+
read_extra_references(fh)
|
482
|
+
self
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|
486
|
+
|