ms-sequest 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,707 @@
1
+
2
+ # standard lib
3
+ require 'set'
4
+ require 'fileutils'
5
+
6
+ # other gems
7
+ require 'arrayclass'
8
+
9
+ # in library
10
+ require 'ms/id/peptide'
11
+ require 'ms/id/protein'
12
+ require 'ms/id/search'
13
+ require 'ms/sequest/params'
14
+
15
+ # for conversions
16
+ require 'ms/sequest/srf/mgf'
17
+ require 'ms/sequest/srf/sqt'
18
+ require 'ms/sequest/srf/dta'
19
+
20
+ module Ms ; end
21
+ module Ms::Sequest ; end
22
+
23
+ class Ms::Sequest::Srf
24
+ include Ms::Id::Search
25
+
26
+ # inherits peps and prots from Search
27
+
28
+ # a String: 3.5, 3.3 or 3.2
29
+ attr_accessor :version
30
+
31
+ attr_accessor :header
32
+ attr_accessor :dta_files
33
+ attr_accessor :out_files
34
+ attr_accessor :params
35
+ # a parallel array to dta_files and out_files where each entry is:
36
+ # [first_scan, last_scan, charge]
37
+ attr_accessor :index
38
+ attr_accessor :base_name
39
+
40
+ # a boolean to indicate if the results have been filtered by the
41
+ # sequest.params precursor mass tolerance
42
+ attr_accessor :filtered_by_precursor_mass_tolerance
43
+
44
+ def protein_class
45
+ Ms::Sequest::Srf::Out::Prot
46
+ end
47
+
48
+ # returns a Sequest::Params object or nil if none
49
+ def self.get_sequest_params(filename)
50
+ # split the file in half and only read the second half (since we can be
51
+ # confident that the params file will be there!)
52
+ File.open(filename) do |handle|
53
+ halfway = handle.stat.size / 2
54
+ handle.seek halfway
55
+ last_half = handle.read
56
+ if sequest_start_index = last_half.rindex('[SEQUEST]')
57
+ params_start_index = sequest_start_index + halfway
58
+ handle.seek(params_start_index)
59
+ Ms::Sequest::Params.new.parse_io(handle)
60
+ else
61
+ warn "#{filename} has no SEQUEST information, may be a truncated/corrupt file!"
62
+ nil
63
+ end
64
+ end
65
+ end
66
+
67
+ def dta_start_byte
68
+ case @version
69
+ when '3.2' ; 3260
70
+ when '3.3' ; 3644
71
+ when '3.5' ; 3644
72
+ end
73
+ end
74
+
75
+ # opts:
76
+ # :filter_by_precursor_mass_tolerance => true | false (default true)
77
+ # # this will filter by the sequest params prec tolerance as is
78
+ # # typically done by Bioworks.
79
+ #
80
+ # :link_protein_hits => true | false (default true)
81
+ # # if true, generates the @prot attribute for the :prot method
82
+ # # and creates one protein per reference that is linked to each
83
+ # # relevant peptide hit.
84
+ # # if false, each protein for each peptide hit is a unique object
85
+ # # and the :prots method returns nil. If you are merging multiple
86
+ # # searches then you probably want to set this to false to avoid
87
+ # # recalculation.
88
+ #
89
+ def initialize(filename=nil, opts={})
90
+ @peps = []
91
+
92
+ @dta_files = []
93
+ @out_files = []
94
+ if filename
95
+ from_file(filename, opts)
96
+ end
97
+ end
98
+
99
+ def round(float, decimal_places)
100
+ sprintf("%.#{decimal_places}f", float)
101
+ end
102
+
103
+ # 1. updates the out_file's list of hits based on passing peptides (but not
104
+ # the original hit id; rank is implicit in array ordering)
105
+ # 2. recalculates deltacn values completely if number of hits changed (does
106
+ # not touch deltacn orig)
107
+ #
108
+ # This can spoil proper protein -> peptide linkages. Ms::Id::Search.merge!
109
+ # should be run after this method to ensure correct protein -> peptide
110
+ # linkages.
111
+ def filter_by_precursor_mass_tolerance!
112
+ pmt = params.peptide_mass_tolerance.to_f
113
+ methd = nil # the method to
114
+
115
+ case params.peptide_mass_units
116
+ when '0'
117
+ amu_based = true
118
+ milli_amu = false
119
+ when '1'
120
+ amu_based = true
121
+ milli_amu = true
122
+ when '2'
123
+ amu_based = false
124
+ end
125
+
126
+ self.filtered_by_precursor_mass_tolerance = true
127
+ self.out_files.each do |out_file|
128
+ hits = out_file.hits
129
+ before = hits.size
130
+ hits.reject! do |pep|
131
+ if amu_based
132
+ if milli_amu
133
+ (pep.deltamass.abs > (pmt/1000))
134
+ else
135
+ (pep.deltamass.abs > pmt)
136
+ end
137
+ else
138
+ (pep.ppm.abs > pmt)
139
+ end
140
+ end
141
+ if hits.size != before
142
+ out_file.hits = hits # <- is this necessary
143
+ Ms::Sequest::Srf::Out::Pep.update_deltacns_from_xcorr(hits)
144
+ out_file.num_hits = hits.size
145
+ end
146
+ end
147
+ self
148
+ end
149
+
150
+ # returns self
151
+ # opts are the same as for 'new'
152
+ def from_file(filename, opts)
153
+ opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
154
+ params = Ms::Sequest::Srf.get_sequest_params(filename)
155
+ dups_gt_0 = false
156
+ if params
157
+ dups = params.print_duplicate_references
158
+ if dups == '0'
159
+ warn <<END
160
+ ***************************************************************************
161
+ For complete protein <=> peptide linkages, .srf files must be created with
162
+ print_duplicate_references > 0. To capture all duplicate references, set the
163
+ sequest parameter 'print_duplicate_references' to 100 or greater.
164
+ ***************************************************************************
165
+ END
166
+ else
167
+ dups_gt_0 = true
168
+ end
169
+ else
170
+ end
171
+
172
+ File.open(filename, "rb") do |fh|
173
+ @header = Ms::Sequest::Srf::Header.new.from_io(fh)
174
+ @version = @header.version
175
+
176
+ unpack_35 = case @version
177
+ when '3.2'
178
+ false
179
+ when '3.3'
180
+ false
181
+ when '3.5'
182
+ true
183
+ end
184
+ @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
185
+
186
+ @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35)
187
+ if fh.eof?
188
+ #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
189
+ @params = nil
190
+ @index = []
191
+ else
192
+ @params = Ms::Sequest::Params.new.parse_io(fh)
193
+ # This is very sensitive to the grab_params method in sequest params
194
+ fh.read(12) ## gap between last params entry and index
195
+ @index = read_scan_index(fh,@header.num_dta_files)
196
+ end
197
+ end
198
+
199
+
200
+ ### UPDATE SOME THINGS:
201
+ @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
202
+ # give each hit a base_name, first_scan, last_scan
203
+ @index.each_with_index do |ind,i|
204
+ mass_measured = @dta_files[i][0]
205
+ #puts @out_files[i].join(", ")
206
+ @out_files[i][0,3] = *ind
207
+ pep_hits = @out_files[i][6]
208
+ @peps.push( *pep_hits )
209
+ pep_hits.each do |pep_hit|
210
+ pep_hit[14,4] = @base_name, *ind
211
+ # add the deltamass
212
+ pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
213
+ pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
214
+ pep_hit[18] = self ## link with the srf object
215
+ end
216
+ end
217
+
218
+ filter_by_precursor_mass_tolerance! if params
219
+
220
+ if opts[:link_protein_hits]
221
+ (@peps, @prots) = merge!([peps]) do |_prot, _peps|
222
+ prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
223
+ end
224
+ end
225
+
226
+ self
227
+ end
228
+
229
+ # returns an index where each entry is [first_scan, last_scan, charge]
230
+ def read_scan_index(fh, num)
231
+ ind_len = 24
232
+ index = Array.new(num)
233
+ unpack_string = 'III'
234
+ st = ''
235
+ ind_len.times do st << '0' end ## create a 24 byte string to receive data
236
+ num.times do |i|
237
+ fh.read(ind_len, st)
238
+ index[i] = st.unpack(unpack_string)
239
+ end
240
+ index
241
+ end
242
+
243
+ # returns an array of dta_files
244
+ def read_dta_files(fh, num_files, unpack_35)
245
+ measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
246
+ dta_files = Array.new(num_files)
247
+ start = dta_start_byte
248
+ unless fh.pos == start
249
+ fh.pos = start
250
+ end
251
+
252
+ header.num_dta_files.times do |i|
253
+ dta_file = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
254
+ measured_mhs[i] = dta_file[0]
255
+ dta_files[i] = dta_file
256
+ end
257
+ [dta_files, measured_mhs]
258
+ end
259
+
260
+ # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
261
+ # will put the fh there.
262
+ def read_out_files(fh,number_files, measured_mhs, unpack_35)
263
+ out_files = Array.new(number_files)
264
+ header.num_dta_files.times do |i|
265
+ out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35)
266
+ end
267
+ out_files
268
+ end
269
+
270
+ end
271
+
272
+ class Ms::Sequest::Srf::Header
273
+
274
+ Start_byte = {
275
+ :enzyme => 438,
276
+ :ion_series => 694,
277
+ :model => 950,
278
+ :modifications => 982,
279
+ :raw_filename => 1822,
280
+ :db_filename => 2082,
281
+ :dta_log_filename => 2602,
282
+ :params_filename => 3122,
283
+ :sequest_log_filename => 3382,
284
+ }
285
+ Byte_length = {
286
+ :enzyme => 256,
287
+ :ion_series => 256,
288
+ :model => 32,
289
+ :modifications => 840,
290
+ :raw_filename => 260,
291
+ :db_filename => 520,
292
+ :dta_log_filename => 520,
293
+ :params_filename => 260,
294
+ :sequest_log_filename => 262, ## is this really 262?? or should be 260??
295
+ }
296
+ Byte_length_v32 = {
297
+ :modifications => 456,
298
+ }
299
+
300
+ # a Ms::Sequest::Srf::DTAGen object
301
+ attr_accessor :version
302
+ attr_accessor :dta_gen
303
+ attr_accessor :enzyme
304
+ attr_accessor :ion_series
305
+ attr_accessor :model
306
+ attr_accessor :modifications
307
+ attr_accessor :raw_filename
308
+ attr_accessor :db_filename
309
+ attr_accessor :dta_log_filename
310
+ attr_accessor :params_filename
311
+ attr_accessor :sequest_log_filename
312
+
313
+ def num_dta_files
314
+ @dta_gen.num_dta_files
315
+ end
316
+
317
+ # sets fh to 0 and grabs the information it wants
318
+ def from_io(fh)
319
+ st = fh.read(4)
320
+ @version = '3.' + st.unpack('I').first.to_s
321
+ @dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
322
+
323
+ ## get the rest of the info
324
+ byte_length = Byte_length.dup
325
+ byte_length.merge! Byte_length_v32 if @version == '3.2'
326
+
327
+ fh.pos = Start_byte[:enzyme]
328
+ [:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
329
+ send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
330
+ end
331
+ self
332
+ end
333
+
334
+ private
335
+ def get_null_padded_string(fh,bytes)
336
+ st = fh.read(bytes)
337
+ # for empty declarations
338
+ if st[0] == 0x000000
339
+ return ''
340
+ end
341
+ st.rstrip!
342
+ st
343
+ end
344
+
345
+
346
+ end
347
+
348
+ # the DTA Generation Params
349
+ class Ms::Sequest::Srf::DTAGen
350
+
351
+ ## not sure if this is correct
352
+ # Float
353
+ attr_accessor :start_time
354
+ # Float
355
+ attr_accessor :start_mass
356
+ # Float
357
+ attr_accessor :end_mass
358
+ # Integer
359
+ attr_accessor :num_dta_files
360
+ # Integer
361
+ attr_accessor :group_scan
362
+ ## not sure if this is correct
363
+ # Integer
364
+ attr_accessor :min_group_count
365
+ # Integer
366
+ attr_accessor :min_ion_threshold
367
+ #attr_accessor :intensity_threshold # can't find yet
368
+ #attr_accessor :precursor_tolerance # can't find yet
369
+ # Integer
370
+ attr_accessor :start_scan
371
+ # Integer
372
+ attr_accessor :end_scan
373
+
374
+ #
375
+ def from_io(fh)
376
+ fh.pos = 0 if fh.pos != 0
377
+ st = fh.read(148)
378
+ (@start_time, @start_mass, @end_mass, @num_dta_files, @group_scan, @min_group_count, @min_ion_threshold, @start_scan, @end_scan) = st.unpack('x36ex12ex4ex48Ix12IIIII')
379
+ self
380
+ end
381
+ end
382
+
383
+ # total_num_possible_charge_states is not correct under 3.5 (Bioworks 3.3.1)
384
+ # unknown is, well unknown...
385
+
386
+ Ms::Sequest::Srf::DTA = Arrayclass.new( %w(mh dta_tic num_peaks charge ms_level unknown total_num_possible_charge_states peaks) )
387
+
388
+ class Ms::Sequest::Srf::DTA
389
+ # original
390
+ # Unpack = "EeIvvvv"
391
+ Unpack_32 = "EeIvvvv"
392
+ Unpack_35 = "Ex8eVx2vvvv"
393
+
394
+ # note on peaks (self[7])
395
+ # this is a byte array of floats, you can get the peaks out with
396
+ # unpack("e*")
397
+
398
+ undef_method :inspect
399
+ def inspect
400
+ peaks_st = 'nil'
401
+ if self[7] ; peaks_st = "[#{self[7].size} bytes]" end
402
+ "<Ms::Sequest::Srf::DTA @mh=#{mh} @dta_tic=#{dta_tic} @num_peaks=#{num_peaks} @charge=#{charge} @ms_level=#{ms_level} @total_num_possible_charge_states=#{total_num_possible_charge_states} @peaks=#{peaks_st} >"
403
+ end
404
+
405
+ def from_io(fh, unpack_35)
406
+ if unpack_35
407
+ @unpack = Unpack_35
408
+ @read_header = 34
409
+ @read_spacer = 22
410
+ else
411
+ @unpack = Unpack_32
412
+ @read_header = 24
413
+ @read_spacer = 24
414
+ end
415
+
416
+ st = fh.read(@read_header)
417
+ # get the bulk of the data in single unpack
418
+ self[0,7] = st.unpack(@unpack)
419
+
420
+ # Scan numbers are given at the end in an index!
421
+ st2 = fh.read(@read_spacer)
422
+
423
+ num_bytes_to_read = num_peaks * 8
424
+ st3 = fh.read(num_bytes_to_read)
425
+ self[7] = st3
426
+ self
427
+ end
428
+
429
+ def to_dta_file_data
430
+ string = "#{round(mh, 6)} #{charge}\r\n"
431
+ peak_ar = peaks.unpack('e*')
432
+ (0...(peak_ar.size)).step(2) do |i|
433
+ # %d is equivalent to floor, so we round by adding 0.5!
434
+ string << "#{round(peak_ar[i], 4)} #{(peak_ar[i+1] + 0.5).floor}\r\n"
435
+ #string << peak_ar[i,2].join(' ') << "\r\n"
436
+ end
437
+ string
438
+ end
439
+
440
+ # write a class dta file to the io object
441
+ def write_dta_file(io)
442
+ io.print to_dta_file_data
443
+ end
444
+
445
+ end
446
+
447
+
448
+ Ms::Sequest::Srf::Out = Arrayclass.new( %w(first_scan last_scan charge num_hits computer date_time hits total_inten lowest_sp num_matched_peptides db_locus_count) )
449
+
450
+ # 0=first_scan, 1=last_scan, 2=charge, 3=num_hits, 4=computer, 5=date_time, 6=hits, 7=total_inten, 8=lowest_sp, 9=num_matched_peptides, 10=db_locus_count
451
+
452
+ class Ms::Sequest::Srf::Out
453
+ Unpack_32 = '@36vx2Z*@60Z*'
454
+ Unpack_35 = '@36vx4Z*@62Z*'
455
+
456
+ undef_method :inspect
457
+ def inspect
458
+ hits_s =
459
+ if self[6]
460
+ ", @hits(#)=#{hits.size}"
461
+ else
462
+ ''
463
+ end
464
+ "<Ms::Sequest::Srf::Out first_scan=#{first_scan}, last_scan=#{last_scan}, charge=#{charge}, num_hits=#{num_hits}, computer=#{computer}, date_time=#{date_time}#{hits_s}>"
465
+ end
466
+
467
+ def from_io(fh, unpack_35)
468
+ ## EMPTY out file is 96 bytes
469
+ ## each hit is 320 bytes
470
+ ## num_hits and charge:
471
+ st = fh.read(96)
472
+
473
+ self[3,3] = st.unpack( (unpack_35 ? Unpack_35 : Unpack_32) )
474
+ self[7,4] = st.unpack('@8eex4Ix4I')
475
+ num_hits = self[3]
476
+
477
+ ar = Array.new(num_hits)
478
+ if ar.size > 0
479
+ num_extra_references = 0
480
+ num_hits.times do |i|
481
+ ar[i] = Ms::Sequest::Srf::Out::Pep.new.from_io(fh, unpack_35)
482
+ num_extra_references += ar[i].num_other_loci
483
+ end
484
+ Ms::Sequest::Srf::Out::Pep.read_extra_references(fh, num_extra_references, ar)
485
+ ## The xcorrs are already ordered by best to worst hit
486
+ ## ADJUST the deltacn's to be meaningful for the top hit:
487
+ ## (the same as bioworks and prophet)
488
+ Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
489
+ #puts ar.map {|a| a.deltacn }.join(", ")
490
+ end
491
+ self[6] = ar
492
+ self
493
+ end
494
+
495
+
496
+
497
+ end
498
+
499
+
500
+ # deltacn_orig - the one that sequest originally reports (top hit gets 0.0)
501
+ # deltacn - modified to be that of the next best hit (by xcorr) and the last
502
+ # hit takes 1.1. This is what is called deltacn by bioworks and pepprophet
503
+ # (at least for the first few years). If filtering occurs, it will be
504
+ # updated.
505
+ # deltacn_orig_updated - the latest updated value of deltacn.
506
+ # Originally, this will be equal to deltacn_orig. After filtering, this will
507
+ # be recalculated. To know if this will be different from deltacn_orig, query
508
+ # match.srf.filtered_by_precursor_mass_tolerance. If this is changed, then
509
+ # deltacn should also be changed to reflect it.
510
+ # mh - the theoretical mass + h
511
+ # prots are created as SRF prot objects with a reference and linked to their
512
+ # peptides (from global hash by reference)
513
+ # ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
514
+ # This is calculated for the M+H mass!
515
+ # num_other_loci is the number of other loci that the peptide matches beyond
516
+ # the first one listed
517
+ # srf = the srf object this scan came from
518
+
519
+
520
+ Ms::Sequest::Srf::Out::Pep = Arrayclass.new( %w(mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
521
+
522
+ # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
523
+
524
+ class Ms::Sequest::Srf::Out::Pep
525
+ #include SpecID::Pep
526
+
527
+ # creates the deltacn that is meaningful for the top hit (the deltacn_orig
528
+ # or the second best hit and so on).
529
+ # assumes sorted
530
+ def self.set_deltacn_from_deltacn_orig(ar)
531
+ (1...ar.size).each {|i| ar[i-1].deltacn = ar[i].deltacn_orig }
532
+ ar[-1].deltacn = 1.1
533
+ end
534
+
535
+ # (assumes sorted)
536
+ # recalculates deltacn from xcorrs and sets deltacn_orig_updated and deltacn
537
+ def self.update_deltacns_from_xcorr(ar)
538
+ if ar.size > 0
539
+ top_score = ar.first[3]
540
+ other_scores = (1...(ar.size)).to_a.map do |i|
541
+ 1.0 - (ar[i][3]/top_score)
542
+ end
543
+ ar.first[20] = 0.0
544
+ (0...(ar.size-1)).each do |i|
545
+ ar[i][19] = other_scores[i] # deltacn
546
+ ar[i+1][20] = other_scores[i] # deltacn_orig_updated
547
+ end
548
+ ar.last[19] = 1.1
549
+ end
550
+ end
551
+
552
+ def self.read_extra_references(fh, num_extra_references, pep_hits)
553
+ num_extra_references.times do
554
+ # 80 bytes total (with index number)
555
+ pep = pep_hits[fh.read(8).unpack('x4I').first - 1]
556
+
557
+ ref = fh.read(80).unpack('A*').first
558
+ pep[10] << Ms::Sequest::Srf::Out::Prot.new(ref[0,38])
559
+ end
560
+ # fh.read(6) if unpack_35
561
+ end
562
+
563
+ # x2=???
564
+ #Unpack_35 = '@64Ex8ex12eeIx22vx2vvx8Z*@246Z*'
565
+ ### NOTE:
566
+ # I need to verify that this is correct (I mean the 'I' after x18)
567
+ Unpack_35 = '@64Ex8ex12eeIx18Ivx2vvx8Z*@246Z*'
568
+ # translation: @64=(64 bytes in to the record), E=mH, x8=8unknown bytes, e=deltacn,
569
+ # x12=12unknown bytes, e=sp, e=xcorr, I=ID#, x18=18 unknown bytes, v=rsp,
570
+ # v=ions_matched, v=ions_total, x8=8unknown bytes, Z*=sequence, 240Z*=at
571
+ # byte 240 grab the string (which is proteins).
572
+ #Unpack_32 = '@64Ex8ex12eeIx18vvvx8Z*@240Z*'
573
+ Unpack_32 = '@64Ex8ex12eeIx14Ivvvx8Z*@240Z*'
574
+ Unpack_four_null_bytes = 'a*'
575
+ Unpack_Zstar = 'Z*'
576
+ Read_35 = 426
577
+ Read_32 = 320
578
+
579
+ FourNullBytes_as_string = "\0\0\0\0"
580
+ #NewRecordStart = "\0\0" + 0x3a.chr + 0x1a.chr + "\0\0"
581
+ NewRecordStart = 0x01.chr + 0x00.chr
582
+ Sequest_record_start = "[SEQUEST]"
583
+
584
+ undef_method :inspect
585
+ def inspect
586
+ st = %w(aaseq sequence mh deltacn_orig sp xcorr id rsp ions_matched ions_total prots deltamass ppm base_name first_scan last_scan charge deltacn).map do |v|
587
+ if v == 'prots'
588
+ "#{v}(#)=#{send(v.to_sym).size}"
589
+ elsif v.is_a? Array
590
+ "##{v}=#{send(v.to_sym).size}"
591
+ else
592
+ "#{v}=#{send(v.to_sym).inspect}"
593
+ end
594
+ end
595
+ st.unshift("<#{self.class}")
596
+ if srf
597
+ st.push("srf(base_name)=#{srf.base_name.inspect}")
598
+ end
599
+ st.push('>')
600
+ st.join(' ')
601
+ #"<Ms::Sequest::Srf::Out::Pep @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @sequence=#{sequence}, @prots(count)=#{prots.size}, @deltamass=#{deltamass}, @ppm=#{ppm} @aaseq=#{aaseq}, @base_name=#{base_name}, @first_scan=#{first_scan}, @last_scan=#{last_scan}, @charge=#{charge}, @srf(base_name)=#{srf.base_name}>"
602
+ end
603
+ # extra_references_array is an array that grows with peptides as extra
604
+ # references are discovered.
605
+ def from_io(fh, unpack_35)
606
+ unpack =
607
+ if unpack_35 ; Unpack_35
608
+ else ; Unpack_32
609
+ end
610
+
611
+ ## get the first part of the info
612
+ st = fh.read(( unpack_35 ? Read_35 : Read_32) ) ## read all the hit data
613
+
614
+ self[0,10] = st.unpack(unpack)
615
+
616
+ # set deltacn_orig_updated
617
+ self[20] = self[1]
618
+
619
+ # we are slicing the reference to 38 chars to be the same length as
620
+ # duplicate references
621
+ self[10] = [Ms::Sequest::Srf::Out::Prot.new(self[10][0,38])]
622
+
623
+ self[13] = Ms::Id::Peptide.sequence_to_aaseq(self[9])
624
+
625
+ fh.read(6) if unpack_35
626
+
627
+ self
628
+ end
629
+
630
+ end
631
+
632
+
633
+ Ms::Sequest::Srf::Out::Prot = Arrayclass.new( %w(reference peps) )
634
+
635
+ class Ms::Sequest::Srf::Out::Prot
636
+ include Ms::Id::Protein
637
+ ## we shouldn't have to do this because this is inlcuded in SpecID::Prot, but
638
+ ## under some circumstances it won't work without explicitly calling it.
639
+ #include ProteinReferenceable
640
+
641
+ tmp = $VERBOSE ; $VERBOSE = nil
642
+ def initialize(reference=nil, peps=[])
643
+ #super(@@arr_size)
644
+ super(self.class.size)
645
+ #@reference = reference
646
+ #@peps = peps
647
+ self[0,2] = reference, peps
648
+ end
649
+ $VERBOSE = tmp
650
+
651
+ # "<Ms::Sequest::Srf::Out::Prot reference=\"#{@reference}\">"
652
+
653
+ undef_method :inspect
654
+ def inspect
655
+ "<Ms::Sequest::Srf::Out::Prot @reference=#{reference}, @peps(#)=#{peps.size}>"
656
+ end
657
+ end
658
+
659
+ class Ms::Sequest::SrfGroup
660
+ include Ms::Id::SearchGroup
661
+
662
+ # inherets an array of Ms::Sequest::Srf::Out::Pep objects
663
+ # inherets an array of Ms::Sequest::Srf::Out::Prot objects
664
+
665
+ # see Ms::Id::Search for acceptable arguments
666
+ # (filename, filenames, array of objects)
667
+ # opts =
668
+ # :filter_by_precursor_mass_tolerance => true | false (default true)
669
+ def initialize(arg, opts={}, &block)
670
+ orig_opts = opts.dup
671
+ indiv_opts = { :link_protein_hits => false }
672
+ super(arg, opts.merge(indiv_opts)) do
673
+ unless orig_opts[:link_protein_hits] == false
674
+ puts "MERGING GROUP!"
675
+ (@peps, @prots) = merge!(@searches.map {|v| v.peps }) do |_prot, _peps|
676
+ Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
677
+ end
678
+ end
679
+ end
680
+ block.call(self) if block_given?
681
+ end
682
+
683
+ def search_class
684
+ Ms::Sequest::Srf
685
+ end
686
+
687
+ # returns the filename used
688
+ # if the file exists, the name will be expanded to full path, otherwise just
689
+ # what is given
690
+ def to_srg(srg_filename='bioworks.srg')
691
+ File.open(srg_filename, 'w') do |v|
692
+ @filenames.each do |srf_file|
693
+ if File.exist? srf_file
694
+ v.puts File.expand_path(srf_file)
695
+ else
696
+ v.puts srf_file
697
+ end
698
+ end
699
+ end
700
+ srg_filename
701
+ end
702
+ end
703
+
704
+
705
+
706
+
707
+