mspire 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/lib/spec_id/srf.rb
CHANGED
@@ -73,11 +73,57 @@ class DTALog
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
+
class SRFGroup
|
77
|
+
include SpecID
|
78
|
+
|
79
|
+
## the srf objects themselves
|
80
|
+
attr_accessor :srfs, :filenames
|
81
|
+
## also inherits :peps and :prots accessor
|
82
|
+
|
83
|
+
# takes an array of filenames
|
84
|
+
# or a single .srg filename
|
85
|
+
# see from_srg to load a single .srg file
|
86
|
+
def initialize(filenames=nil)
|
87
|
+
@filenames = filenames
|
88
|
+
@peps = []
|
89
|
+
@prots = []
|
90
|
+
@global_ref_hash = {}
|
91
|
+
@srfs = []
|
92
|
+
if filenames
|
93
|
+
if filenames.is_a?(String) && filenames =~ /\.srg$/
|
94
|
+
srg_filename = filenames.dup
|
95
|
+
@filename = srg_filename
|
96
|
+
filenames = IO.readlines(filenames).grep(/\w/).map {|v| v.chomp }
|
97
|
+
filenames.each do |file|
|
98
|
+
if !File.exist? file
|
99
|
+
puts "File: #{file} in #{srg_filename} does not exist!"
|
100
|
+
puts "Please modify #{srg_filename} to point to existing files."
|
101
|
+
abort
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
filenames.each do |file|
|
106
|
+
@srfs << SRF.new(file, @peps, @global_ref_hash)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# returns the filename used
|
112
|
+
def to_srg(srg_filename='bioworks.srg')
|
113
|
+
File.open(srg_filename, 'w') do |v|
|
114
|
+
@filenames.each do |srf_file|
|
115
|
+
v.puts File.expand_path(srf_file)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
srg_filename
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
76
122
|
class SRF
|
77
123
|
|
78
124
|
# a string 3.3 or 3.2
|
79
125
|
attr_accessor :version
|
80
|
-
|
126
|
+
|
81
127
|
attr_accessor :header
|
82
128
|
attr_accessor :dta_files
|
83
129
|
attr_accessor :out_files
|
@@ -85,6 +131,7 @@ class SRF
|
|
85
131
|
# a parallel array to dta_files and out_files where each entry is:
|
86
132
|
# [first_scan, last_scan, charge]
|
87
133
|
attr_accessor :index
|
134
|
+
attr_accessor :base_name
|
88
135
|
|
89
136
|
def dta_start_byte
|
90
137
|
case @version
|
@@ -93,26 +140,44 @@ class SRF
|
|
93
140
|
end
|
94
141
|
end
|
95
142
|
|
96
|
-
|
143
|
+
# peps and
|
144
|
+
def initialize(filename=nil, peps=[], global_ref_hash={})
|
97
145
|
@dta_files = []
|
98
146
|
@out_files = []
|
99
147
|
if filename
|
100
|
-
from_file(filename)
|
148
|
+
from_file(filename, peps, global_ref_hash)
|
101
149
|
end
|
102
150
|
end
|
103
151
|
|
104
152
|
# returns self
|
105
|
-
def from_file(filename)
|
153
|
+
def from_file(filename, peps, global_ref_hash)
|
106
154
|
|
107
155
|
File.open(filename, "rb") do |fh|
|
108
156
|
@header = SRF::Header.new.from_handle(fh)
|
109
157
|
@version = @header.version
|
110
|
-
@dta_files = read_dta_files(fh,@header.num_dta_files)
|
111
|
-
@out_files = read_out_files(fh,@header.num_dta_files)
|
112
|
-
@params =
|
158
|
+
@dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files)
|
159
|
+
@out_files = read_out_files(fh,@header.num_dta_files, global_ref_hash, measured_mhs)
|
160
|
+
@params = Sequest::Params.new.parse_handle(fh)
|
113
161
|
fh.read(12) ## gap between last params entry and index
|
114
162
|
@index = read_scan_index(fh,@header.num_dta_files)
|
115
163
|
end
|
164
|
+
|
165
|
+
### UPDATE SOME THINGS ON SINGLE PASS:
|
166
|
+
@base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
|
167
|
+
# give each hit a base_name, first_scan, last_scan
|
168
|
+
@index.each_with_index do |ind,i|
|
169
|
+
mass_measured = @dta_files[i][0]
|
170
|
+
#puts @out_files[i].join(", ")
|
171
|
+
pep_hits = @out_files[i][3]
|
172
|
+
peps.push( *pep_hits )
|
173
|
+
pep_hits.each do |pep_hit|
|
174
|
+
pep_hit[13,3] = @base_name, *ind
|
175
|
+
# add the deltamass
|
176
|
+
pep_hit[10] = pep_hit[0] - mass_measured # real - measured (deltamass)
|
177
|
+
pep_hit[11] = 1.0e6 * pep_hit[10].abs / mass_measured ## ppm
|
178
|
+
pep_hit[17] = self ## link with the srf object
|
179
|
+
end
|
180
|
+
end
|
116
181
|
self
|
117
182
|
end
|
118
183
|
|
@@ -143,26 +208,26 @@ class SRF
|
|
143
208
|
|
144
209
|
# returns an array of dta_files
|
145
210
|
def read_dta_files(fh, num_files)
|
211
|
+
measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
|
146
212
|
dta_files = Array.new(num_files)
|
147
213
|
start = dta_start_byte
|
148
214
|
unless fh.pos == start
|
149
215
|
fh.pos = start
|
150
216
|
end
|
151
217
|
header.num_dta_files.times do |i|
|
152
|
-
|
218
|
+
dta_file = SRF::DTA.new.from_handle(fh)
|
219
|
+
measured_mhs[i] = dta_file[0]
|
220
|
+
dta_files[i] = dta_file
|
153
221
|
end
|
154
|
-
dta_files
|
222
|
+
[dta_files, measured_mhs]
|
155
223
|
end
|
156
224
|
|
157
225
|
# filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
|
158
226
|
# will put the fh there.
|
159
|
-
def read_out_files(fh,number_files)
|
227
|
+
def read_out_files(fh,number_files, global_ref_hash, measured_mhs)
|
160
228
|
out_files = Array.new(number_files)
|
161
229
|
header.num_dta_files.times do |i|
|
162
|
-
|
163
|
-
# abort
|
164
|
-
#end
|
165
|
-
out_files[i] = SRF::OUT.new.from_handle(fh)
|
230
|
+
out_files[i] = SRF::OUT.new.from_handle(fh, global_ref_hash)
|
166
231
|
end
|
167
232
|
out_files
|
168
233
|
end
|
@@ -261,45 +326,17 @@ class SRF::DTAGen
|
|
261
326
|
end
|
262
327
|
end
|
263
328
|
|
264
|
-
|
329
|
+
SRF::DTA = ArrayClass.new(%w(mh dta_tic num_peaks charge ms_level unknown total_num_possible_charge_states peaks))
|
330
|
+
|
331
|
+
class SRF::DTA
|
332
|
+
Unpack = "EeIvvvv"
|
265
333
|
|
266
334
|
# is this universal?
|
267
335
|
First_record_start_byte = 3644
|
268
336
|
|
269
|
-
|
270
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
271
|
-
ind_keys = {:mh => 0, :dta_tic => 1, :num_peaks => 2, :charge => 3, :ms_level => 4, :unknown => 5, :total_num_possible_charge_states => 6, :peaks => 7}
|
272
|
-
@@arr_size = ind_keys.size
|
273
|
-
|
274
|
-
def mh ; self[0] end ; def mh=(oth) ; self[0] = oth end
|
275
|
-
def dta_tic ; self[1] end ; def dta_tic=(oth) ; self[1] = oth end
|
276
|
-
def num_peaks ; self[2] end ; def num_peaks=(oth) ; self[2] = oth end
|
277
|
-
def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
|
278
|
-
def ms_level ; self[4] end ; def ms_level=(oth) ; self[4] = oth end
|
279
|
-
def unknown ; self[5] end ; def unknown=(oth) ; self[5] = oth end
|
280
|
-
def total_num_possible_charge_states ; self[6] end ; def total_num_possible_charge_states=(oth) ; self[6] = oth end
|
281
|
-
|
337
|
+
# note on peaks (self[7])
|
282
338
|
# this is a byte array of floats, you can get the peaks out with
|
283
339
|
# unpack("e*")
|
284
|
-
def peaks ; self[7] end
|
285
|
-
# this is a byte array of floats, you can get the peaks out with
|
286
|
-
def peaks=(oth) ; self[7] = oth end
|
287
|
-
|
288
|
-
@@arr_size = ind_keys.size
|
289
|
-
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
290
|
-
ind_keys.merge!(ind_keys_w_eq)
|
291
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
292
|
-
|
293
|
-
def initialize(args=nil)
|
294
|
-
super(@@arr_size.size)
|
295
|
-
if args
|
296
|
-
if args.is_a? Hash
|
297
|
-
args.each do |k,v|
|
298
|
-
self[@@ind[k]] = v
|
299
|
-
end
|
300
|
-
end
|
301
|
-
end
|
302
|
-
end
|
303
340
|
|
304
341
|
def inspect
|
305
342
|
peaks_st = 'nil'
|
@@ -310,7 +347,7 @@ class SRF::DTA < Array
|
|
310
347
|
def from_handle(fh)
|
311
348
|
st = fh.read(24)
|
312
349
|
# get the bulk of the data in single unpack
|
313
|
-
self[0,7] = st.unpack(
|
350
|
+
self[0,7] = st.unpack(Unpack)
|
314
351
|
|
315
352
|
# Scan numbers possibly hidden in this next sequence of bytes (I think)
|
316
353
|
st2 = fh.read(24)
|
@@ -323,112 +360,99 @@ class SRF::DTA < Array
|
|
323
360
|
|
324
361
|
end
|
325
362
|
|
363
|
+
SRF::OUT = ArrayClass.new( %w(num_hits computer date_time hits) )
|
364
|
+
# 0=num_hits 1=charge 2=computer 3=date_time 4=hits
|
326
365
|
|
327
|
-
class SRF::OUT
|
328
|
-
|
329
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
330
|
-
ind_keys = {:num_hits => 0, :charge => 1, :computer => 2, :date_time => 3, :first_scan => 4, :last_scan => 5, :filename_noext => 6, :hits => 7}
|
331
|
-
@@arr_size = ind_keys.size
|
332
|
-
|
333
|
-
def num_hits ; self[0] end ; def num_hits=(oth) ; self[0] = oth end
|
334
|
-
def charge ; self[1] end ; def charge=(oth) ; self[1] = oth end
|
335
|
-
def computer ; self[2] end ; def computer=(oth) ; self[2] = oth end
|
336
|
-
def date_time ; self[3] end ; def date_time=(oth) ; self[3] = oth end
|
337
|
-
def first_scan ; self[4] end ; def first_scan=(oth) ; self[4] = oth end
|
338
|
-
def last_scan ; self[5] end ; def last_scan=(oth) ; self[5] = oth end
|
339
|
-
def filename_noext ; self[6] end ; def filename_noext=(oth) ; self[6] = oth end
|
340
|
-
def hits ; self[7] end ; def hits=(oth) ; self[7] = oth end
|
341
|
-
|
342
|
-
@@arr_size = ind_keys.size
|
343
|
-
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
344
|
-
ind_keys.merge!(ind_keys_w_eq)
|
345
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
346
|
-
|
347
|
-
def initialize(args=nil)
|
348
|
-
super(@@arr_size.size)
|
349
|
-
if args
|
350
|
-
if args.is_a? Hash
|
351
|
-
args.each do |k,v|
|
352
|
-
self[@@ind[k]] = v
|
353
|
-
end
|
354
|
-
end
|
355
|
-
end
|
356
|
-
end
|
366
|
+
class SRF::OUT
|
367
|
+
Unpack = '@36vx2Z*@60Z*'
|
357
368
|
|
358
369
|
def inspect
|
359
370
|
if first_scan
|
360
371
|
ins = "@first_scan=#{first_scan}, @last_scan=#{last_scan}, @filename_noext=#{filename_noext}, "
|
361
372
|
end
|
362
|
-
"<SRF::OUT @num_hits=#{num_hits}, @
|
373
|
+
"<SRF::OUT @num_hits=#{num_hits}, @computer=#{computer}, @date_time=#{date_time}, #{ins}@hits=#{hits.inspect}>"
|
363
374
|
end
|
364
375
|
|
365
|
-
def from_handle(fh)
|
376
|
+
def from_handle(fh, global_ref_hash)
|
366
377
|
## EMPTY out file is 96 bytes
|
367
378
|
## each hit is 320 bytes
|
368
379
|
## num_hits and charge:
|
369
380
|
st = fh.read(96)
|
370
|
-
self[0,
|
381
|
+
self[0,3] = st.unpack(Unpack)
|
371
382
|
num_hits = self[0]
|
372
383
|
|
373
384
|
ar = Array.new(num_hits)
|
374
|
-
|
375
|
-
|
385
|
+
if ar.size > 0
|
386
|
+
num_hits.times do |i|
|
387
|
+
ar[i] = SRF::OUT::Pep.new.from_handle(fh, global_ref_hash)
|
388
|
+
end
|
389
|
+
## The xcorrs are already ordered by best to worst hit
|
390
|
+
## ADJUST the deltacn's to be meaningful for the top hit:
|
391
|
+
## (the same as bioworks and prophet)
|
392
|
+
(1...ar.size).each {|i| ar[i-1].deltacn = ar[i].deltacn }
|
393
|
+
ar.last.deltacn = 1.1
|
376
394
|
end
|
377
|
-
self[
|
395
|
+
self[3] = ar
|
378
396
|
|
379
397
|
self
|
380
398
|
end
|
381
399
|
|
382
400
|
end
|
383
401
|
|
384
|
-
|
402
|
+
# deltacn is modified to be that of the next best hit (by xcorr).
|
403
|
+
# if there is no next best hit, then it will be 1.1 (like bioworks)
|
404
|
+
# mh is the theoretical mass + h
|
405
|
+
# prots are created as SRF prot objects with a reference and linked to their
|
406
|
+
# peptides (from global hash by reference)
|
407
|
+
# ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
|
408
|
+
# This is calculated for the M+H mass!
|
409
|
+
# srf = the srf object this scan came from
|
410
|
+
SRF::OUT::Pep = ArrayClass.new(%w( mh deltacn sp xcorr id rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf) )
|
411
|
+
|
412
|
+
# 0=mh 1=deltacn 2=sp 3=xcorr 4=id 5=rsp 6=ions_matched 7=ions_total 8=sequence 9=prots 10=deltamass 11=ppm 12=aaseq 13=base_name 14=first_scan 15=last_scan 16=charge 17=srf
|
413
|
+
|
414
|
+
class SRF::OUT::Pep
|
415
|
+
include SpecID::Pep
|
416
|
+
|
417
|
+
Unpack = '@64Ex8ex12eeIx18vvvx8Z*@240Z*'
|
418
|
+
Unpack_four_null_bytes = 'a*'
|
419
|
+
Unpack_Zstar = 'Z*'
|
420
|
+
|
385
421
|
FourNullBytes_as_string = "\0\0\0\0"
|
386
422
|
#NewRecordStart = "\0\0" + 0x3a.chr + 0x1a.chr + "\0\0"
|
387
423
|
NewRecordStart = 0x01.chr + 0x00.chr
|
388
424
|
Sequest_record_start = "[SEQUEST]"
|
389
425
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
@@arr_size = ind_keys.size
|
394
|
-
|
395
|
-
def mh ; self[0] end ; def mh=(oth) ; self[0] = oth end
|
396
|
-
def deltacn ; self[1] end ; def deltacn=(oth) ; self[1] = oth end
|
397
|
-
def sp ; self[2] end ; def sp=(oth) ; self[2] = oth end
|
398
|
-
def xcorr ; self[3] end ; def xcorr=(oth) ; self[3] = oth end
|
399
|
-
def id ; self[4] end ; def id=(oth) ; self[4] = oth end
|
400
|
-
def rsp ; self[5] end ; def rsp=(oth) ; self[5] = oth end
|
401
|
-
def ions_matched ; self[6] end ; def ions_matched=(oth) ; self[6] = oth end
|
402
|
-
def ions_total ; self[7] end ; def ions_total=(oth) ; self[7] = oth end
|
403
|
-
def peptide ; self[8] end ; def peptide=(oth) ; self[8] = oth end
|
404
|
-
def reference ; self[9] end ; def reference=(oth) ; self[9] = oth end
|
405
|
-
# The number of total proteins sharing this peptide
|
406
|
-
def num_tot_proteins ; self[10] end ; def num_tot_proteins=(oth) ; self[10] = oth end
|
407
|
-
|
408
|
-
def initialize(args=nil)
|
409
|
-
super(@@arr_size.size)
|
410
|
-
if args
|
411
|
-
if args.is_a? Hash
|
412
|
-
args.each do |k,v|
|
413
|
-
self[@@ind[k]] = v
|
414
|
-
end
|
415
|
-
end
|
416
|
-
end
|
417
|
-
end
|
426
|
+
tmp = $VERBOSE ; $VERBOSE = nil
|
427
|
+
def prots() self[9] end
|
428
|
+
$VERBOSE = tmp
|
418
429
|
|
419
430
|
def inspect
|
420
|
-
|
431
|
+
st = %w(aaseq sequence mh deltacn sp xcorr id rsp ions_matched ions_total prots deltamass ppm base_name first_scan last_scan charge).map do |v|
|
432
|
+
if v.is_a? Array
|
433
|
+
"##{v}=#{send(v.to_sym).size}"
|
434
|
+
else
|
435
|
+
"@#{v}=#{send(v.to_sym)}"
|
436
|
+
end
|
437
|
+
end
|
438
|
+
st.unshift("<#{self.class}")
|
439
|
+
if srf
|
440
|
+
st.push("@srf(base_name)=#{srf.base_name}")
|
441
|
+
end
|
442
|
+
st.push('>')
|
443
|
+
st.join(' ')
|
444
|
+
#"<SRF::OUT::Pep @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @sequence=#{sequence}, @prots(count)=#{prots.size}, @deltamass=#{deltamass}, @ppm=#{ppm} @aaseq=#{aaseq}, @base_name=#{base_name}, @first_scan=#{first_scan}, @last_scan=#{last_scan}, @charge=#{charge}, @srf(base_name)=#{srf.base_name}>"
|
421
445
|
end
|
422
446
|
|
423
447
|
## There must be a better way to do this.
|
424
448
|
## We are checking that there are no additional protein references only
|
425
449
|
## so that we are in register for the next reading
|
426
|
-
def read_extra_references(fh)
|
450
|
+
def read_extra_references(fh, global_ref_hash)
|
427
451
|
$SRF_OUT_HIT_FH_POS = fh.pos
|
428
452
|
st = fh.read(4)
|
429
453
|
#puts "HHH: " + st.unpack("H*").first
|
430
454
|
## if we see 0000 0000 we are done
|
431
|
-
if st.unpack(
|
455
|
+
if st.unpack(Unpack_four_null_bytes).first == FourNullBytes_as_string
|
432
456
|
fh.pos = $SRF_OUT_HIT_FH_POS
|
433
457
|
return nil
|
434
458
|
end
|
@@ -436,20 +460,11 @@ class SRF::OUT::Hit < Array
|
|
436
460
|
|
437
461
|
## NOTE: in context of 4 bytes read above!
|
438
462
|
st = fh.read(36)
|
439
|
-
#p self
|
440
|
-
#puts "HHHH: " + st.unpack("H*").first
|
441
|
-
#puts st[34,2].unpack("H*").first
|
442
463
|
if st[34,2] == NewRecordStart
|
443
464
|
fh.pos = $SRF_OUT_HIT_FH_POS
|
444
465
|
return nil
|
445
466
|
end
|
446
467
|
|
447
|
-
##if st.unpack("@22H*").first == NewRecordStart_as_hex
|
448
|
-
#if st[22,6] == NewRecordStart
|
449
|
-
# fh.pos = $SRF_OUT_HIT_FH_POS
|
450
|
-
# return nil
|
451
|
-
#end
|
452
|
-
|
453
468
|
# is this the end of the outfiles?
|
454
469
|
## BACK to beginning of this section
|
455
470
|
fh.pos = $SRF_OUT_HIT_FH_POS
|
@@ -459,28 +474,67 @@ class SRF::OUT::Hit < Array
|
|
459
474
|
end
|
460
475
|
|
461
476
|
## we have extra references
|
462
|
-
|
463
|
-
fh.
|
464
|
-
|
465
|
-
|
477
|
+
## original read was fh.read(79)
|
478
|
+
fh.seek(-1, IO::SEEK_CUR)
|
479
|
+
self[9].push( new_protein(fh.read(80).unpack(Unpack_Zstar).first, self, global_ref_hash ) )
|
480
|
+
|
481
|
+
#p self.prots
|
482
|
+
#puts self.prots.size
|
466
483
|
#$glob ||= 0
|
467
484
|
#$glob += 1
|
468
|
-
#if $glob ==
|
485
|
+
#if $glob == 20
|
469
486
|
# abort
|
470
487
|
#end
|
471
488
|
|
472
|
-
read_extra_references(fh)
|
489
|
+
read_extra_references(fh,global_ref_hash)
|
473
490
|
end
|
474
491
|
|
492
|
+
def new_protein(reference, peptide, global_ref_hash)
|
493
|
+
if global_ref_hash.key? reference
|
494
|
+
global_ref_hash[reference].peps << peptide
|
495
|
+
else
|
496
|
+
global_ref_hash[reference] = SRF::OUT::Prot.new(reference, [peptide])
|
497
|
+
end
|
498
|
+
global_ref_hash[reference]
|
499
|
+
end
|
475
500
|
|
476
|
-
def from_handle(fh)
|
501
|
+
def from_handle(fh, global_ref_hash)
|
477
502
|
## get the first part of the info
|
478
503
|
st = fh.read(320) ## read all the hit data
|
479
|
-
self[0,10] = st.unpack(
|
480
|
-
|
481
|
-
|
504
|
+
self[0,10] = st.unpack(Unpack)
|
505
|
+
# we are slicing the reference to 38 chars to be the same length as
|
506
|
+
# duplicate references
|
507
|
+
self[9] = [new_protein(self[9][0,38], self, global_ref_hash)]
|
508
|
+
self[12] = SpecID::Pep.sequence_to_aaseq(self[8])
|
509
|
+
read_extra_references(fh, global_ref_hash)
|
510
|
+
|
482
511
|
self
|
483
512
|
end
|
484
513
|
|
485
514
|
end
|
486
515
|
|
516
|
+
SRF::OUT::Prot = ArrayClass.new( %w(reference peps) )
|
517
|
+
|
518
|
+
class SRF::OUT::Prot
|
519
|
+
include SpecID::Prot
|
520
|
+
|
521
|
+
tmp = $VERBOSE ; $VERBOSE = nil
|
522
|
+
def initialize(reference=nil, peps=[])
|
523
|
+
super(@@arr_size)
|
524
|
+
#@reference = reference
|
525
|
+
#@peps = peps
|
526
|
+
self[0,2] = reference, peps
|
527
|
+
end
|
528
|
+
$VERBOSE = tmp
|
529
|
+
|
530
|
+
# "<SRF::OUT::Prot reference=\"#{@reference}\">"
|
531
|
+
|
532
|
+
def inspect
|
533
|
+
"<SRF::OUT::Prot @reference=#{reference}, @peps(#)=#{peps.size}>"
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
537
|
+
|
538
|
+
|
539
|
+
|
540
|
+
|