mspire 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/lib/spec_id/srf.rb
CHANGED
@@ -73,11 +73,57 @@ class DTALog
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
+
class SRFGroup
|
77
|
+
include SpecID
|
78
|
+
|
79
|
+
## the srf objects themselves
|
80
|
+
attr_accessor :srfs, :filenames
|
81
|
+
## also inherits :peps and :prots accessor
|
82
|
+
|
83
|
+
# takes an array of filenames
|
84
|
+
# or a single .srg filename
|
85
|
+
# see from_srg to load a single .srg file
|
86
|
+
def initialize(filenames=nil)
|
87
|
+
@filenames = filenames
|
88
|
+
@peps = []
|
89
|
+
@prots = []
|
90
|
+
@global_ref_hash = {}
|
91
|
+
@srfs = []
|
92
|
+
if filenames
|
93
|
+
if filenames.is_a?(String) && filenames =~ /\.srg$/
|
94
|
+
srg_filename = filenames.dup
|
95
|
+
@filename = srg_filename
|
96
|
+
filenames = IO.readlines(filenames).grep(/\w/).map {|v| v.chomp }
|
97
|
+
filenames.each do |file|
|
98
|
+
if !File.exist? file
|
99
|
+
puts "File: #{file} in #{srg_filename} does not exist!"
|
100
|
+
puts "Please modify #{srg_filename} to point to existing files."
|
101
|
+
abort
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
filenames.each do |file|
|
106
|
+
@srfs << SRF.new(file, @peps, @global_ref_hash)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# returns the filename used
|
112
|
+
def to_srg(srg_filename='bioworks.srg')
|
113
|
+
File.open(srg_filename, 'w') do |v|
|
114
|
+
@filenames.each do |srf_file|
|
115
|
+
v.puts File.expand_path(srf_file)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
srg_filename
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
76
122
|
class SRF
|
77
123
|
|
78
124
|
# a string 3.3 or 3.2
|
79
125
|
attr_accessor :version
|
80
|
-
|
126
|
+
|
81
127
|
attr_accessor :header
|
82
128
|
attr_accessor :dta_files
|
83
129
|
attr_accessor :out_files
|
@@ -85,6 +131,7 @@ class SRF
|
|
85
131
|
# a parallel array to dta_files and out_files where each entry is:
|
86
132
|
# [first_scan, last_scan, charge]
|
87
133
|
attr_accessor :index
|
134
|
+
attr_accessor :base_name
|
88
135
|
|
89
136
|
def dta_start_byte
|
90
137
|
case @version
|
@@ -93,26 +140,44 @@ class SRF
|
|
93
140
|
end
|
94
141
|
end
|
95
142
|
|
96
|
-
|
143
|
+
# peps and
|
144
|
+
def initialize(filename=nil, peps=[], global_ref_hash={})
|
97
145
|
@dta_files = []
|
98
146
|
@out_files = []
|
99
147
|
if filename
|
100
|
-
from_file(filename)
|
148
|
+
from_file(filename, peps, global_ref_hash)
|
101
149
|
end
|
102
150
|
end
|
103
151
|
|
104
152
|
# returns self
|
105
|
-
def from_file(filename)
|
153
|
+
def from_file(filename, peps, global_ref_hash)
|
106
154
|
|
107
155
|
File.open(filename, "rb") do |fh|
|
108
156
|
@header = SRF::Header.new.from_handle(fh)
|
109
157
|
@version = @header.version
|
110
|
-
@dta_files = read_dta_files(fh,@header.num_dta_files)
|
111
|
-
@out_files = read_out_files(fh,@header.num_dta_files)
|
112
|
-
@params =
|
158
|
+
@dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files)
|
159
|
+
@out_files = read_out_files(fh,@header.num_dta_files, global_ref_hash, measured_mhs)
|
160
|
+
@params = Sequest::Params.new.parse_handle(fh)
|
113
161
|
fh.read(12) ## gap between last params entry and index
|
114
162
|
@index = read_scan_index(fh,@header.num_dta_files)
|
115
163
|
end
|
164
|
+
|
165
|
+
### UPDATE SOME THINGS ON SINGLE PASS:
|
166
|
+
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
167
|
+
# give each hit a base_name, first_scan, last_scan
|
168
|
+
@index.each_with_index do |ind,i|
|
169
|
+
mass_measured = @dta_files[i][0]
|
170
|
+
#puts @out_files[i].join(", ")
|
171
|
+
pep_hits = @out_files[i][3]
|
172
|
+
peps.push( *pep_hits )
|
173
|
+
pep_hits.each do |pep_hit|
|
174
|
+
pep_hit[13,3] = @base_name, *ind
|
175
|
+
# add the deltamass
|
176
|
+
pep_hit[10] = pep_hit[0] - mass_measured # real - measured (deltamass)
|
177
|
+
pep_hit[11] = 1.0e6 * pep_hit[10].abs / mass_measured ## ppm
|
178
|
+
pep_hit[17] = self ## link with the srf object
|
179
|
+
end
|
180
|
+
end
|
116
181
|
self
|
117
182
|
end
|
118
183
|
|
@@ -143,26 +208,26 @@ class SRF
|
|
143
208
|
|
144
209
|
# returns an array of dta_files
|
145
210
|
def read_dta_files(fh, num_files)
|
211
|
+
measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
|
146
212
|
dta_files = Array.new(num_files)
|
147
213
|
start = dta_start_byte
|
148
214
|
unless fh.pos == start
|
149
215
|
fh.pos = start
|
150
216
|
end
|
151
217
|
header.num_dta_files.times do |i|
|
152
|
-
|
218
|
+
dta_file = SRF::DTA.new.from_handle(fh)
|
219
|
+
measured_mhs[i] = dta_file[0]
|
220
|
+
dta_files[i] = dta_file
|
153
221
|
end
|
154
|
-
dta_files
|
222
|
+
[dta_files, measured_mhs]
|
155
223
|
end
|
156
224
|
|
157
225
|
# filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
|
158
226
|
# will put the fh there.
|
159
|
-
def read_out_files(fh,number_files)
|
227
|
+
def read_out_files(fh,number_files, global_ref_hash, measured_mhs)
|
160
228
|
out_files = Array.new(number_files)
|
161
229
|
header.num_dta_files.times do |i|
|
162
|
-
|
163
|
-
# abort
|
164
|
-
#end
|
165
|
-
out_files[i] = SRF::OUT.new.from_handle(fh)
|
230
|
+
out_files[i] = SRF::OUT.new.from_handle(fh, global_ref_hash)
|
166
231
|
end
|
167
232
|
out_files
|
168
233
|
end
|
@@ -261,45 +326,17 @@ class SRF::DTAGen
|
|
261
326
|
end
|
262
327
|
end
|
263
328
|
|
264
|
-
|
329
|
+
SRF::DTA = ArrayClass.new(%w(mh dta_tic num_peaks charge ms_level unknown total_num_possible_charge_states peaks))
|
330
|
+
|
331
|
+
class SRF::DTA
|
332
|
+
Unpack = "EeIvvvv"
|
265
333
|
|
266
334
|
# is this universal?
|
267
335
|
First_record_start_byte = 3644
|
268
336
|
|
269
|
-
|
270
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
271
|
-
ind_keys = {:mh => 0, :dta_tic => 1, :num_peaks => 2, :charge => 3, :ms_level => 4, :unknown => 5, :total_num_possible_charge_states => 6, :peaks => 7}
|
272
|
-
@@arr_size = ind_keys.size
|
273
|
-
|
274
|
-
def mh ; self[0] end ; def mh=(oth) ; self[0] = oth end
|
275
|
-
def dta_tic ; self[1] end ; def dta_tic=(oth) ; self[1] = oth end
|
276
|
-
def num_peaks ; self[2] end ; def num_peaks=(oth) ; self[2] = oth end
|
277
|
-
def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
|
278
|
-
def ms_level ; self[4] end ; def ms_level=(oth) ; self[4] = oth end
|
279
|
-
def unknown ; self[5] end ; def unknown=(oth) ; self[5] = oth end
|
280
|
-
def total_num_possible_charge_states ; self[6] end ; def total_num_possible_charge_states=(oth) ; self[6] = oth end
|
281
|
-
|
337
|
+
# note on peaks (self[7])
|
282
338
|
# this is a byte array of floats, you can get the peaks out with
|
283
339
|
# unpack("e*")
|
284
|
-
def peaks ; self[7] end
|
285
|
-
# this is a byte array of floats, you can get the peaks out with
|
286
|
-
def peaks=(oth) ; self[7] = oth end
|
287
|
-
|
288
|
-
@@arr_size = ind_keys.size
|
289
|
-
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
290
|
-
ind_keys.merge!(ind_keys_w_eq)
|
291
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
292
|
-
|
293
|
-
def initialize(args=nil)
|
294
|
-
super(@@arr_size.size)
|
295
|
-
if args
|
296
|
-
if args.is_a? Hash
|
297
|
-
args.each do |k,v|
|
298
|
-
self[@@ind[k]] = v
|
299
|
-
end
|
300
|
-
end
|
301
|
-
end
|
302
|
-
end
|
303
340
|
|
304
341
|
def inspect
|
305
342
|
peaks_st = 'nil'
|
@@ -310,7 +347,7 @@ class SRF::DTA < Array
|
|
310
347
|
def from_handle(fh)
|
311
348
|
st = fh.read(24)
|
312
349
|
# get the bulk of the data in single unpack
|
313
|
-
self[0,7] = st.unpack(
|
350
|
+
self[0,7] = st.unpack(Unpack)
|
314
351
|
|
315
352
|
# Scan numbers possibly hidden in this next sequence of bytes (I think)
|
316
353
|
st2 = fh.read(24)
|
@@ -323,112 +360,99 @@ class SRF::DTA < Array
|
|
323
360
|
|
324
361
|
end
|
325
362
|
|
363
|
+
SRF::OUT = ArrayClass.new( %w(num_hits computer date_time hits) )
|
364
|
+
# 0=num_hits 1=charge 2=computer 3=date_time 4=hits
|
326
365
|
|
327
|
-
class SRF::OUT
|
328
|
-
|
329
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
330
|
-
ind_keys = {:num_hits => 0, :charge => 1, :computer => 2, :date_time => 3, :first_scan => 4, :last_scan => 5, :filename_noext => 6, :hits => 7}
|
331
|
-
@@arr_size = ind_keys.size
|
332
|
-
|
333
|
-
def num_hits ; self[0] end ; def num_hits=(oth) ; self[0] = oth end
|
334
|
-
def charge ; self[1] end ; def charge=(oth) ; self[1] = oth end
|
335
|
-
def computer ; self[2] end ; def computer=(oth) ; self[2] = oth end
|
336
|
-
def date_time ; self[3] end ; def date_time=(oth) ; self[3] = oth end
|
337
|
-
def first_scan ; self[4] end ; def first_scan=(oth) ; self[4] = oth end
|
338
|
-
def last_scan ; self[5] end ; def last_scan=(oth) ; self[5] = oth end
|
339
|
-
def filename_noext ; self[6] end ; def filename_noext=(oth) ; self[6] = oth end
|
340
|
-
def hits ; self[7] end ; def hits=(oth) ; self[7] = oth end
|
341
|
-
|
342
|
-
@@arr_size = ind_keys.size
|
343
|
-
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
344
|
-
ind_keys.merge!(ind_keys_w_eq)
|
345
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
346
|
-
|
347
|
-
def initialize(args=nil)
|
348
|
-
super(@@arr_size.size)
|
349
|
-
if args
|
350
|
-
if args.is_a? Hash
|
351
|
-
args.each do |k,v|
|
352
|
-
self[@@ind[k]] = v
|
353
|
-
end
|
354
|
-
end
|
355
|
-
end
|
356
|
-
end
|
366
|
+
class SRF::OUT
|
367
|
+
Unpack = '@36vx2Z*@60Z*'
|
357
368
|
|
358
369
|
def inspect
|
359
370
|
if first_scan
|
360
371
|
ins = "@first_scan=#{first_scan}, @last_scan=#{last_scan}, @filename_noext=#{filename_noext}, "
|
361
372
|
end
|
362
|
-
"<SRF::OUT @num_hits=#{num_hits}, @
|
373
|
+
"<SRF::OUT @num_hits=#{num_hits}, @computer=#{computer}, @date_time=#{date_time}, #{ins}@hits=#{hits.inspect}>"
|
363
374
|
end
|
364
375
|
|
365
|
-
def from_handle(fh)
|
376
|
+
def from_handle(fh, global_ref_hash)
|
366
377
|
## EMPTY out file is 96 bytes
|
367
378
|
## each hit is 320 bytes
|
368
379
|
## num_hits and charge:
|
369
380
|
st = fh.read(96)
|
370
|
-
self[0,
|
381
|
+
self[0,3] = st.unpack(Unpack)
|
371
382
|
num_hits = self[0]
|
372
383
|
|
373
384
|
ar = Array.new(num_hits)
|
374
|
-
|
375
|
-
|
385
|
+
if ar.size > 0
|
386
|
+
num_hits.times do |i|
|
387
|
+
ar[i] = SRF::OUT::Pep.new.from_handle(fh, global_ref_hash)
|
388
|
+
end
|
389
|
+
## The xcorrs are already ordered by best to worst hit
|
390
|
+
## ADJUST the deltacn's to be meaningful for the top hit:
|
391
|
+
## (the same as bioworks and prophet)
|
392
|
+
(1...ar.size).each {|i| ar[i-1].deltacn = ar[i].deltacn }
|
393
|
+
ar.last.deltacn = 1.1
|
376
394
|
end
|
377
|
-
self[
|
395
|
+
self[3] = ar
|
378
396
|
|
379
397
|
self
|
380
398
|
end
|
381
399
|
|
382
400
|
end
|
383
401
|
|
384
|
-
|
402
|
+
# deltacn is modified to be that of the next best hit (by xcorr).
|
403
|
+
# if there is no next best hit, then it will be 1.1 (like bioworks)
|
404
|
+
# mh is the theoretical mass + h
|
405
|
+
# prots are created as SRF prot objects with a reference and linked to their
|
406
|
+
# peptides (from global hash by reference)
|
407
|
+
# ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
|
408
|
+
# This is calculated for the M+H mass!
|
409
|
+
# srf = the srf object this scan came from
|
410
|
+
SRF::OUT::Pep = ArrayClass.new(%w( mh deltacn sp xcorr id rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf) )
|
411
|
+
|
412
|
+
# 0=mh 1=deltacn 2=sp 3=xcorr 4=id 5=rsp 6=ions_matched 7=ions_total 8=sequence 9=prots 10=deltamass 11=ppm 12=aaseq 13=base_name 14=first_scan 15=last_scan 16=charge 17=srf
|
413
|
+
|
414
|
+
class SRF::OUT::Pep
|
415
|
+
include SpecID::Pep
|
416
|
+
|
417
|
+
Unpack = '@64Ex8ex12eeIx18vvvx8Z*@240Z*'
|
418
|
+
Unpack_four_null_bytes = 'a*'
|
419
|
+
Unpack_Zstar = 'Z*'
|
420
|
+
|
385
421
|
FourNullBytes_as_string = "\0\0\0\0"
|
386
422
|
#NewRecordStart = "\0\0" + 0x3a.chr + 0x1a.chr + "\0\0"
|
387
423
|
NewRecordStart = 0x01.chr + 0x00.chr
|
388
424
|
Sequest_record_start = "[SEQUEST]"
|
389
425
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
@@arr_size = ind_keys.size
|
394
|
-
|
395
|
-
def mh ; self[0] end ; def mh=(oth) ; self[0] = oth end
|
396
|
-
def deltacn ; self[1] end ; def deltacn=(oth) ; self[1] = oth end
|
397
|
-
def sp ; self[2] end ; def sp=(oth) ; self[2] = oth end
|
398
|
-
def xcorr ; self[3] end ; def xcorr=(oth) ; self[3] = oth end
|
399
|
-
def id ; self[4] end ; def id=(oth) ; self[4] = oth end
|
400
|
-
def rsp ; self[5] end ; def rsp=(oth) ; self[5] = oth end
|
401
|
-
def ions_matched ; self[6] end ; def ions_matched=(oth) ; self[6] = oth end
|
402
|
-
def ions_total ; self[7] end ; def ions_total=(oth) ; self[7] = oth end
|
403
|
-
def peptide ; self[8] end ; def peptide=(oth) ; self[8] = oth end
|
404
|
-
def reference ; self[9] end ; def reference=(oth) ; self[9] = oth end
|
405
|
-
# The number of total proteins sharing this peptide
|
406
|
-
def num_tot_proteins ; self[10] end ; def num_tot_proteins=(oth) ; self[10] = oth end
|
407
|
-
|
408
|
-
def initialize(args=nil)
|
409
|
-
super(@@arr_size.size)
|
410
|
-
if args
|
411
|
-
if args.is_a? Hash
|
412
|
-
args.each do |k,v|
|
413
|
-
self[@@ind[k]] = v
|
414
|
-
end
|
415
|
-
end
|
416
|
-
end
|
417
|
-
end
|
426
|
+
tmp = $VERBOSE ; $VERBOSE = nil
|
427
|
+
def prots() self[9] end
|
428
|
+
$VERBOSE = tmp
|
418
429
|
|
419
430
|
def inspect
|
420
|
-
|
431
|
+
st = %w(aaseq sequence mh deltacn sp xcorr id rsp ions_matched ions_total prots deltamass ppm base_name first_scan last_scan charge).map do |v|
|
432
|
+
if v.is_a? Array
|
433
|
+
"##{v}=#{send(v.to_sym).size}"
|
434
|
+
else
|
435
|
+
"@#{v}=#{send(v.to_sym)}"
|
436
|
+
end
|
437
|
+
end
|
438
|
+
st.unshift("<#{self.class}")
|
439
|
+
if srf
|
440
|
+
st.push("@srf(base_name)=#{srf.base_name}")
|
441
|
+
end
|
442
|
+
st.push('>')
|
443
|
+
st.join(' ')
|
444
|
+
#"<SRF::OUT::Pep @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @sequence=#{sequence}, @prots(count)=#{prots.size}, @deltamass=#{deltamass}, @ppm=#{ppm} @aaseq=#{aaseq}, @base_name=#{base_name}, @first_scan=#{first_scan}, @last_scan=#{last_scan}, @charge=#{charge}, @srf(base_name)=#{srf.base_name}>"
|
421
445
|
end
|
422
446
|
|
423
447
|
## There must be a better way to do this.
|
424
448
|
## We are checking that there are no additional protein references only
|
425
449
|
## so that we are in register for the next reading
|
426
|
-
def read_extra_references(fh)
|
450
|
+
def read_extra_references(fh, global_ref_hash)
|
427
451
|
$SRF_OUT_HIT_FH_POS = fh.pos
|
428
452
|
st = fh.read(4)
|
429
453
|
#puts "HHH: " + st.unpack("H*").first
|
430
454
|
## if we see 0000 0000 we are done
|
431
|
-
if st.unpack(
|
455
|
+
if st.unpack(Unpack_four_null_bytes).first == FourNullBytes_as_string
|
432
456
|
fh.pos = $SRF_OUT_HIT_FH_POS
|
433
457
|
return nil
|
434
458
|
end
|
@@ -436,20 +460,11 @@ class SRF::OUT::Hit < Array
|
|
436
460
|
|
437
461
|
## NOTE: in context of 4 bytes read above!
|
438
462
|
st = fh.read(36)
|
439
|
-
#p self
|
440
|
-
#puts "HHHH: " + st.unpack("H*").first
|
441
|
-
#puts st[34,2].unpack("H*").first
|
442
463
|
if st[34,2] == NewRecordStart
|
443
464
|
fh.pos = $SRF_OUT_HIT_FH_POS
|
444
465
|
return nil
|
445
466
|
end
|
446
467
|
|
447
|
-
##if st.unpack("@22H*").first == NewRecordStart_as_hex
|
448
|
-
#if st[22,6] == NewRecordStart
|
449
|
-
# fh.pos = $SRF_OUT_HIT_FH_POS
|
450
|
-
# return nil
|
451
|
-
#end
|
452
|
-
|
453
468
|
# is this the end of the outfiles?
|
454
469
|
## BACK to beginning of this section
|
455
470
|
fh.pos = $SRF_OUT_HIT_FH_POS
|
@@ -459,28 +474,67 @@ class SRF::OUT::Hit < Array
|
|
459
474
|
end
|
460
475
|
|
461
476
|
## we have extra references
|
462
|
-
|
463
|
-
fh.
|
464
|
-
|
465
|
-
|
477
|
+
## original read was fh.read(79)
|
478
|
+
fh.seek(-1, IO::SEEK_CUR)
|
479
|
+
self[9].push( new_protein(fh.read(80).unpack(Unpack_Zstar).first, self, global_ref_hash ) )
|
480
|
+
|
481
|
+
#p self.prots
|
482
|
+
#puts self.prots.size
|
466
483
|
#$glob ||= 0
|
467
484
|
#$glob += 1
|
468
|
-
#if $glob ==
|
485
|
+
#if $glob == 20
|
469
486
|
# abort
|
470
487
|
#end
|
471
488
|
|
472
|
-
read_extra_references(fh)
|
489
|
+
read_extra_references(fh,global_ref_hash)
|
473
490
|
end
|
474
491
|
|
492
|
+
def new_protein(reference, peptide, global_ref_hash)
|
493
|
+
if global_ref_hash.key? reference
|
494
|
+
global_ref_hash[reference].peps << peptide
|
495
|
+
else
|
496
|
+
global_ref_hash[reference] = SRF::OUT::Prot.new(reference, [peptide])
|
497
|
+
end
|
498
|
+
global_ref_hash[reference]
|
499
|
+
end
|
475
500
|
|
476
|
-
def from_handle(fh)
|
501
|
+
def from_handle(fh, global_ref_hash)
|
477
502
|
## get the first part of the info
|
478
503
|
st = fh.read(320) ## read all the hit data
|
479
|
-
self[0,10] = st.unpack(
|
480
|
-
|
481
|
-
|
504
|
+
self[0,10] = st.unpack(Unpack)
|
505
|
+
# we are slicing the reference to 38 chars to be the same length as
|
506
|
+
# duplicate references
|
507
|
+
self[9] = [new_protein(self[9][0,38], self, global_ref_hash)]
|
508
|
+
self[12] = SpecID::Pep.sequence_to_aaseq(self[8])
|
509
|
+
read_extra_references(fh, global_ref_hash)
|
510
|
+
|
482
511
|
self
|
483
512
|
end
|
484
513
|
|
485
514
|
end
|
486
515
|
|
516
|
+
SRF::OUT::Prot = ArrayClass.new( %w(reference peps) )
|
517
|
+
|
518
|
+
class SRF::OUT::Prot
|
519
|
+
include SpecID::Prot
|
520
|
+
|
521
|
+
tmp = $VERBOSE ; $VERBOSE = nil
|
522
|
+
def initialize(reference=nil, peps=[])
|
523
|
+
super(@@arr_size)
|
524
|
+
#@reference = reference
|
525
|
+
#@peps = peps
|
526
|
+
self[0,2] = reference, peps
|
527
|
+
end
|
528
|
+
$VERBOSE = tmp
|
529
|
+
|
530
|
+
# "<SRF::OUT::Prot reference=\"#{@reference}\">"
|
531
|
+
|
532
|
+
def inspect
|
533
|
+
"<SRF::OUT::Prot @reference=#{reference}, @peps(#)=#{peps.size}>"
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
537
|
+
|
538
|
+
|
539
|
+
|
540
|
+
|