ms-sequest 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,707 @@
1
+
2
+ # standard lib
3
+ require 'set'
4
+ require 'fileutils'
5
+
6
+ # other gems
7
+ require 'arrayclass'
8
+
9
+ # in library
10
+ require 'ms/id/peptide'
11
+ require 'ms/id/protein'
12
+ require 'ms/id/search'
13
+ require 'ms/sequest/params'
14
+
15
+ # for conversions
16
+ require 'ms/sequest/srf/mgf'
17
+ require 'ms/sequest/srf/sqt'
18
+ require 'ms/sequest/srf/dta'
19
+
20
+ module Ms ; end
21
+ module Ms::Sequest ; end
22
+
23
+ class Ms::Sequest::Srf
24
+ include Ms::Id::Search
25
+
26
+ # inherits peps and prots from Search
27
+
28
+ # a String: 3.5, 3.3 or 3.2
29
+ attr_accessor :version
30
+
31
+ attr_accessor :header
32
+ attr_accessor :dta_files
33
+ attr_accessor :out_files
34
+ attr_accessor :params
35
+ # a parallel array to dta_files and out_files where each entry is:
36
+ # [first_scan, last_scan, charge]
37
+ attr_accessor :index
38
+ attr_accessor :base_name
39
+
40
+ # a boolean to indicate if the results have been filtered by the
41
+ # sequest.params precursor mass tolerance
42
+ attr_accessor :filtered_by_precursor_mass_tolerance
43
+
44
+ def protein_class
45
+ Ms::Sequest::Srf::Out::Prot
46
+ end
47
+
48
+ # returns a Sequest::Params object or nil if none
49
+ def self.get_sequest_params(filename)
50
+ # split the file in half and only read the second half (since we can be
51
+ # confident that the params file will be there!)
52
+ File.open(filename) do |handle|
53
+ halfway = handle.stat.size / 2
54
+ handle.seek halfway
55
+ last_half = handle.read
56
+ if sequest_start_index = last_half.rindex('[SEQUEST]')
57
+ params_start_index = sequest_start_index + halfway
58
+ handle.seek(params_start_index)
59
+ Ms::Sequest::Params.new.parse_io(handle)
60
+ else
61
+ warn "#{filename} has no SEQUEST information, may be a truncated/corrupt file!"
62
+ nil
63
+ end
64
+ end
65
+ end
66
+
67
+ def dta_start_byte
68
+ case @version
69
+ when '3.2' ; 3260
70
+ when '3.3' ; 3644
71
+ when '3.5' ; 3644
72
+ end
73
+ end
74
+
75
+ # opts:
76
+ # :filter_by_precursor_mass_tolerance => true | false (default true)
77
+ # # this will filter by the sequest params prec tolerance as is
78
+ # # typically done by Bioworks.
79
+ #
80
+ # :link_protein_hits => true | false (default true)
81
+ # # if true, generates the @prot attribute for the :prot method
82
+ # # and creates one protein per reference that is linked to each
83
+ # # relevant peptide hit.
84
+ # # if false, each protein for each peptide hit is a unique object
85
+ # # and the :prots method returns nil. If you are merging multiple
86
+ # # searches then you probably want to set this to false to avoid
87
+ # # recalculation.
88
+ #
89
+ def initialize(filename=nil, opts={})
90
+ @peps = []
91
+
92
+ @dta_files = []
93
+ @out_files = []
94
+ if filename
95
+ from_file(filename, opts)
96
+ end
97
+ end
98
+
99
+ def round(float, decimal_places)
100
+ sprintf("%.#{decimal_places}f", float)
101
+ end
102
+
103
+ # 1. updates the out_file's list of hits based on passing peptides (but not
104
+ # the original hit id; rank is implicit in array ordering)
105
+ # 2. recalculates deltacn values completely if number of hits changed (does
106
+ # not touch deltacn orig)
107
+ #
108
+ # This can spoil proper protein -> peptide linkages. Ms::Id::Search.merge!
109
+ # should be run after this method to ensure correct protein -> peptide
110
+ # linkages.
111
+ def filter_by_precursor_mass_tolerance!
112
+ pmt = params.peptide_mass_tolerance.to_f
113
+ methd = nil # the method to
114
+
115
+ case params.peptide_mass_units
116
+ when '0'
117
+ amu_based = true
118
+ milli_amu = false
119
+ when '1'
120
+ amu_based = true
121
+ milli_amu = true
122
+ when '2'
123
+ amu_based = false
124
+ end
125
+
126
+ self.filtered_by_precursor_mass_tolerance = true
127
+ self.out_files.each do |out_file|
128
+ hits = out_file.hits
129
+ before = hits.size
130
+ hits.reject! do |pep|
131
+ if amu_based
132
+ if milli_amu
133
+ (pep.deltamass.abs > (pmt/1000))
134
+ else
135
+ (pep.deltamass.abs > pmt)
136
+ end
137
+ else
138
+ (pep.ppm.abs > pmt)
139
+ end
140
+ end
141
+ if hits.size != before
142
+ out_file.hits = hits # <- is this necessary
143
+ Ms::Sequest::Srf::Out::Pep.update_deltacns_from_xcorr(hits)
144
+ out_file.num_hits = hits.size
145
+ end
146
+ end
147
+ self
148
+ end
149
+
150
+ # returns self
151
+ # opts are the same as for 'new'
152
+ def from_file(filename, opts)
153
+ opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
154
+ params = Ms::Sequest::Srf.get_sequest_params(filename)
155
+ dups_gt_0 = false
156
+ if params
157
+ dups = params.print_duplicate_references
158
+ if dups == '0'
159
+ warn <<END
160
+ ***************************************************************************
161
+ For complete protein <=> peptide linkages, .srf files must be created with
162
+ print_duplicate_references > 0. To capture all duplicate references, set the
163
+ sequest parameter 'print_duplicate_references' to 100 or greater.
164
+ ***************************************************************************
165
+ END
166
+ else
167
+ dups_gt_0 = true
168
+ end
169
+ else
170
+ end
171
+
172
+ File.open(filename, "rb") do |fh|
173
+ @header = Ms::Sequest::Srf::Header.new.from_io(fh)
174
+ @version = @header.version
175
+
176
+ unpack_35 = case @version
177
+ when '3.2'
178
+ false
179
+ when '3.3'
180
+ false
181
+ when '3.5'
182
+ true
183
+ end
184
+ @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
185
+
186
+ @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35)
187
+ if fh.eof?
188
+ #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
189
+ @params = nil
190
+ @index = []
191
+ else
192
+ @params = Ms::Sequest::Params.new.parse_io(fh)
193
+ # This is very sensitive to the grab_params method in sequest params
194
+ fh.read(12) ## gap between last params entry and index
195
+ @index = read_scan_index(fh,@header.num_dta_files)
196
+ end
197
+ end
198
+
199
+
200
+ ### UPDATE SOME THINGS:
201
+ @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
202
+ # give each hit a base_name, first_scan, last_scan
203
+ @index.each_with_index do |ind,i|
204
+ mass_measured = @dta_files[i][0]
205
+ #puts @out_files[i].join(", ")
206
+ @out_files[i][0,3] = *ind
207
+ pep_hits = @out_files[i][6]
208
+ @peps.push( *pep_hits )
209
+ pep_hits.each do |pep_hit|
210
+ pep_hit[14,4] = @base_name, *ind
211
+ # add the deltamass
212
+ pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
213
+ pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
214
+ pep_hit[18] = self ## link with the srf object
215
+ end
216
+ end
217
+
218
+ filter_by_precursor_mass_tolerance! if params
219
+
220
+ if opts[:link_protein_hits]
221
+ (@peps, @prots) = merge!([peps]) do |_prot, _peps|
222
+ prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
223
+ end
224
+ end
225
+
226
+ self
227
+ end
228
+
229
+ # returns an index where each entry is [first_scan, last_scan, charge]
230
+ def read_scan_index(fh, num)
231
+ ind_len = 24
232
+ index = Array.new(num)
233
+ unpack_string = 'III'
234
+ st = ''
235
+ ind_len.times do st << '0' end ## create a 24 byte string to receive data
236
+ num.times do |i|
237
+ fh.read(ind_len, st)
238
+ index[i] = st.unpack(unpack_string)
239
+ end
240
+ index
241
+ end
242
+
243
+ # returns an array of dta_files
244
+ def read_dta_files(fh, num_files, unpack_35)
245
+ measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
246
+ dta_files = Array.new(num_files)
247
+ start = dta_start_byte
248
+ unless fh.pos == start
249
+ fh.pos = start
250
+ end
251
+
252
+ header.num_dta_files.times do |i|
253
+ dta_file = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
254
+ measured_mhs[i] = dta_file[0]
255
+ dta_files[i] = dta_file
256
+ end
257
+ [dta_files, measured_mhs]
258
+ end
259
+
260
+ # filehandle (fh) must be at the start of the outfiles. 'read_dta_files'
261
+ # will put the fh there.
262
+ def read_out_files(fh,number_files, measured_mhs, unpack_35)
263
+ out_files = Array.new(number_files)
264
+ header.num_dta_files.times do |i|
265
+ out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35)
266
+ end
267
+ out_files
268
+ end
269
+
270
+ end
271
+
272
+ class Ms::Sequest::Srf::Header
273
+
274
+ Start_byte = {
275
+ :enzyme => 438,
276
+ :ion_series => 694,
277
+ :model => 950,
278
+ :modifications => 982,
279
+ :raw_filename => 1822,
280
+ :db_filename => 2082,
281
+ :dta_log_filename => 2602,
282
+ :params_filename => 3122,
283
+ :sequest_log_filename => 3382,
284
+ }
285
+ Byte_length = {
286
+ :enzyme => 256,
287
+ :ion_series => 256,
288
+ :model => 32,
289
+ :modifications => 840,
290
+ :raw_filename => 260,
291
+ :db_filename => 520,
292
+ :dta_log_filename => 520,
293
+ :params_filename => 260,
294
+ :sequest_log_filename => 262, ## is this really 262?? or should be 260??
295
+ }
296
+ Byte_length_v32 = {
297
+ :modifications => 456,
298
+ }
299
+
300
+ # a Ms::Sequest::Srf::DTAGen object
301
+ attr_accessor :version
302
+ attr_accessor :dta_gen
303
+ attr_accessor :enzyme
304
+ attr_accessor :ion_series
305
+ attr_accessor :model
306
+ attr_accessor :modifications
307
+ attr_accessor :raw_filename
308
+ attr_accessor :db_filename
309
+ attr_accessor :dta_log_filename
310
+ attr_accessor :params_filename
311
+ attr_accessor :sequest_log_filename
312
+
313
+ def num_dta_files
314
+ @dta_gen.num_dta_files
315
+ end
316
+
317
+ # sets fh to 0 and grabs the information it wants
318
+ def from_io(fh)
319
+ st = fh.read(4)
320
+ @version = '3.' + st.unpack('I').first.to_s
321
+ @dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
322
+
323
+ ## get the rest of the info
324
+ byte_length = Byte_length.dup
325
+ byte_length.merge! Byte_length_v32 if @version == '3.2'
326
+
327
+ fh.pos = Start_byte[:enzyme]
328
+ [:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
329
+ send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
330
+ end
331
+ self
332
+ end
333
+
334
+ private
335
+ def get_null_padded_string(fh,bytes)
336
+ st = fh.read(bytes)
337
+ # for empty declarations
338
+ if st[0] == 0x000000
339
+ return ''
340
+ end
341
+ st.rstrip!
342
+ st
343
+ end
344
+
345
+
346
+ end
347
+
348
+ # the DTA Generation Params
349
+ class Ms::Sequest::Srf::DTAGen
350
+
351
+ ## not sure if this is correct
352
+ # Float
353
+ attr_accessor :start_time
354
+ # Float
355
+ attr_accessor :start_mass
356
+ # Float
357
+ attr_accessor :end_mass
358
+ # Integer
359
+ attr_accessor :num_dta_files
360
+ # Integer
361
+ attr_accessor :group_scan
362
+ ## not sure if this is correct
363
+ # Integer
364
+ attr_accessor :min_group_count
365
+ # Integer
366
+ attr_accessor :min_ion_threshold
367
+ #attr_accessor :intensity_threshold # can't find yet
368
+ #attr_accessor :precursor_tolerance # can't find yet
369
+ # Integer
370
+ attr_accessor :start_scan
371
+ # Integer
372
+ attr_accessor :end_scan
373
+
374
+ #
375
+ def from_io(fh)
376
+ fh.pos = 0 if fh.pos != 0
377
+ st = fh.read(148)
378
+ (@start_time, @start_mass, @end_mass, @num_dta_files, @group_scan, @min_group_count, @min_ion_threshold, @start_scan, @end_scan) = st.unpack('x36ex12ex4ex48Ix12IIIII')
379
+ self
380
+ end
381
+ end
382
+
383
+ # total_num_possible_charge_states is not correct under 3.5 (Bioworks 3.3.1)
384
+ # unknown is, well unknown...
385
+
386
+ Ms::Sequest::Srf::DTA = Arrayclass.new( %w(mh dta_tic num_peaks charge ms_level unknown total_num_possible_charge_states peaks) )
387
+
388
+ class Ms::Sequest::Srf::DTA
389
+ # original
390
+ # Unpack = "EeIvvvv"
391
+ Unpack_32 = "EeIvvvv"
392
+ Unpack_35 = "Ex8eVx2vvvv"
393
+
394
+ # note on peaks (self[7])
395
+ # this is a byte array of floats, you can get the peaks out with
396
+ # unpack("e*")
397
+
398
+ undef_method :inspect
399
+ def inspect
400
+ peaks_st = 'nil'
401
+ if self[7] ; peaks_st = "[#{self[7].size} bytes]" end
402
+ "<Ms::Sequest::Srf::DTA @mh=#{mh} @dta_tic=#{dta_tic} @num_peaks=#{num_peaks} @charge=#{charge} @ms_level=#{ms_level} @total_num_possible_charge_states=#{total_num_possible_charge_states} @peaks=#{peaks_st} >"
403
+ end
404
+
405
+ def from_io(fh, unpack_35)
406
+ if unpack_35
407
+ @unpack = Unpack_35
408
+ @read_header = 34
409
+ @read_spacer = 22
410
+ else
411
+ @unpack = Unpack_32
412
+ @read_header = 24
413
+ @read_spacer = 24
414
+ end
415
+
416
+ st = fh.read(@read_header)
417
+ # get the bulk of the data in single unpack
418
+ self[0,7] = st.unpack(@unpack)
419
+
420
+ # Scan numbers are given at the end in an index!
421
+ st2 = fh.read(@read_spacer)
422
+
423
+ num_bytes_to_read = num_peaks * 8
424
+ st3 = fh.read(num_bytes_to_read)
425
+ self[7] = st3
426
+ self
427
+ end
428
+
429
+ def to_dta_file_data
430
+ string = "#{round(mh, 6)} #{charge}\r\n"
431
+ peak_ar = peaks.unpack('e*')
432
+ (0...(peak_ar.size)).step(2) do |i|
433
+ # %d is equivalent to floor, so we round by adding 0.5!
434
+ string << "#{round(peak_ar[i], 4)} #{(peak_ar[i+1] + 0.5).floor}\r\n"
435
+ #string << peak_ar[i,2].join(' ') << "\r\n"
436
+ end
437
+ string
438
+ end
439
+
440
+ # write a class dta file to the io object
441
+ def write_dta_file(io)
442
+ io.print to_dta_file_data
443
+ end
444
+
445
+ end
446
+
447
+
448
+ Ms::Sequest::Srf::Out = Arrayclass.new( %w(first_scan last_scan charge num_hits computer date_time hits total_inten lowest_sp num_matched_peptides db_locus_count) )
449
+
450
+ # 0=first_scan, 1=last_scan, 2=charge, 3=num_hits, 4=computer, 5=date_time, 6=hits, 7=total_inten, 8=lowest_sp, 9=num_matched_peptides, 10=db_locus_count
451
+
452
+ class Ms::Sequest::Srf::Out
453
+ Unpack_32 = '@36vx2Z*@60Z*'
454
+ Unpack_35 = '@36vx4Z*@62Z*'
455
+
456
+ undef_method :inspect
457
+ def inspect
458
+ hits_s =
459
+ if self[6]
460
+ ", @hits(#)=#{hits.size}"
461
+ else
462
+ ''
463
+ end
464
+ "<Ms::Sequest::Srf::Out first_scan=#{first_scan}, last_scan=#{last_scan}, charge=#{charge}, num_hits=#{num_hits}, computer=#{computer}, date_time=#{date_time}#{hits_s}>"
465
+ end
466
+
467
+ def from_io(fh, unpack_35)
468
+ ## EMPTY out file is 96 bytes
469
+ ## each hit is 320 bytes
470
+ ## num_hits and charge:
471
+ st = fh.read(96)
472
+
473
+ self[3,3] = st.unpack( (unpack_35 ? Unpack_35 : Unpack_32) )
474
+ self[7,4] = st.unpack('@8eex4Ix4I')
475
+ num_hits = self[3]
476
+
477
+ ar = Array.new(num_hits)
478
+ if ar.size > 0
479
+ num_extra_references = 0
480
+ num_hits.times do |i|
481
+ ar[i] = Ms::Sequest::Srf::Out::Pep.new.from_io(fh, unpack_35)
482
+ num_extra_references += ar[i].num_other_loci
483
+ end
484
+ Ms::Sequest::Srf::Out::Pep.read_extra_references(fh, num_extra_references, ar)
485
+ ## The xcorrs are already ordered by best to worst hit
486
+ ## ADJUST the deltacn's to be meaningful for the top hit:
487
+ ## (the same as bioworks and prophet)
488
+ Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
489
+ #puts ar.map {|a| a.deltacn }.join(", ")
490
+ end
491
+ self[6] = ar
492
+ self
493
+ end
494
+
495
+
496
+
497
+ end
498
+
499
+
500
+ # deltacn_orig - the one that sequest originally reports (top hit gets 0.0)
501
+ # deltacn - modified to be that of the next best hit (by xcorr) and the last
502
+ # hit takes 1.1. This is what is called deltacn by bioworks and pepprophet
503
+ # (at least for the first few years). If filtering occurs, it will be
504
+ # updated.
505
+ # deltacn_orig_updated - the latest updated value of deltacn.
506
+ # Originally, this will be equal to deltacn_orig. After filtering, this will
507
+ # be recalculated. To know if this will be different from deltacn_orig, query
508
+ # match.srf.filtered_by_precursor_mass_tolerance. If this is changed, then
509
+ # deltacn should also be changed to reflect it.
510
+ # mh - the theoretical mass + h
511
+ # prots are created as SRF prot objects with a reference and linked to their
512
+ # peptides (from global hash by reference)
513
+ # ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
514
+ # This is calculated for the M+H mass!
515
+ # num_other_loci is the number of other loci that the peptide matches beyond
516
+ # the first one listed
517
+ # srf = the srf object this scan came from
518
+
519
+
520
+ Ms::Sequest::Srf::Out::Pep = Arrayclass.new( %w(mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
521
+
522
+ # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
523
+
524
+ class Ms::Sequest::Srf::Out::Pep
525
+ #include SpecID::Pep
526
+
527
+ # creates the deltacn that is meaningful for the top hit (the deltacn_orig
528
+ # or the second best hit and so on).
529
+ # assumes sorted
530
+ def self.set_deltacn_from_deltacn_orig(ar)
531
+ (1...ar.size).each {|i| ar[i-1].deltacn = ar[i].deltacn_orig }
532
+ ar[-1].deltacn = 1.1
533
+ end
534
+
535
+ # (assumes sorted)
536
+ # recalculates deltacn from xcorrs and sets deltacn_orig_updated and deltacn
537
+ def self.update_deltacns_from_xcorr(ar)
538
+ if ar.size > 0
539
+ top_score = ar.first[3]
540
+ other_scores = (1...(ar.size)).to_a.map do |i|
541
+ 1.0 - (ar[i][3]/top_score)
542
+ end
543
+ ar.first[20] = 0.0
544
+ (0...(ar.size-1)).each do |i|
545
+ ar[i][19] = other_scores[i] # deltacn
546
+ ar[i+1][20] = other_scores[i] # deltacn_orig_updated
547
+ end
548
+ ar.last[19] = 1.1
549
+ end
550
+ end
551
+
552
+ def self.read_extra_references(fh, num_extra_references, pep_hits)
553
+ num_extra_references.times do
554
+ # 80 bytes total (with index number)
555
+ pep = pep_hits[fh.read(8).unpack('x4I').first - 1]
556
+
557
+ ref = fh.read(80).unpack('A*').first
558
+ pep[10] << Ms::Sequest::Srf::Out::Prot.new(ref[0,38])
559
+ end
560
+ # fh.read(6) if unpack_35
561
+ end
562
+
563
+ # x2=???
564
+ #Unpack_35 = '@64Ex8ex12eeIx22vx2vvx8Z*@246Z*'
565
+ ### NOTE:
566
+ # I need to verify that this is correct (I mean the 'I' after x18)
567
+ Unpack_35 = '@64Ex8ex12eeIx18Ivx2vvx8Z*@246Z*'
568
+ # translation: @64=(64 bytes in to the record), E=mH, x8=8unknown bytes, e=deltacn,
569
+ # x12=12unknown bytes, e=sp, e=xcorr, I=ID#, x18=18 unknown bytes, v=rsp,
570
+ # v=ions_matched, v=ions_total, x8=8unknown bytes, Z*=sequence, 240Z*=at
571
+ # byte 240 grab the string (which is proteins).
572
+ #Unpack_32 = '@64Ex8ex12eeIx18vvvx8Z*@240Z*'
573
+ Unpack_32 = '@64Ex8ex12eeIx14Ivvvx8Z*@240Z*'
574
+ Unpack_four_null_bytes = 'a*'
575
+ Unpack_Zstar = 'Z*'
576
+ Read_35 = 426
577
+ Read_32 = 320
578
+
579
+ FourNullBytes_as_string = "\0\0\0\0"
580
+ #NewRecordStart = "\0\0" + 0x3a.chr + 0x1a.chr + "\0\0"
581
+ NewRecordStart = 0x01.chr + 0x00.chr
582
+ Sequest_record_start = "[SEQUEST]"
583
+
584
+ undef_method :inspect
585
+ def inspect
586
+ st = %w(aaseq sequence mh deltacn_orig sp xcorr id rsp ions_matched ions_total prots deltamass ppm base_name first_scan last_scan charge deltacn).map do |v|
587
+ if v == 'prots'
588
+ "#{v}(#)=#{send(v.to_sym).size}"
589
+ elsif v.is_a? Array
590
+ "##{v}=#{send(v.to_sym).size}"
591
+ else
592
+ "#{v}=#{send(v.to_sym).inspect}"
593
+ end
594
+ end
595
+ st.unshift("<#{self.class}")
596
+ if srf
597
+ st.push("srf(base_name)=#{srf.base_name.inspect}")
598
+ end
599
+ st.push('>')
600
+ st.join(' ')
601
+ #"<Ms::Sequest::Srf::Out::Pep @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @sequence=#{sequence}, @prots(count)=#{prots.size}, @deltamass=#{deltamass}, @ppm=#{ppm} @aaseq=#{aaseq}, @base_name=#{base_name}, @first_scan=#{first_scan}, @last_scan=#{last_scan}, @charge=#{charge}, @srf(base_name)=#{srf.base_name}>"
602
+ end
603
+ # extra_references_array is an array that grows with peptides as extra
604
+ # references are discovered.
605
+ def from_io(fh, unpack_35)
606
+ unpack =
607
+ if unpack_35 ; Unpack_35
608
+ else ; Unpack_32
609
+ end
610
+
611
+ ## get the first part of the info
612
+ st = fh.read(( unpack_35 ? Read_35 : Read_32) ) ## read all the hit data
613
+
614
+ self[0,10] = st.unpack(unpack)
615
+
616
+ # set deltacn_orig_updated
617
+ self[20] = self[1]
618
+
619
+ # we are slicing the reference to 38 chars to be the same length as
620
+ # duplicate references
621
+ self[10] = [Ms::Sequest::Srf::Out::Prot.new(self[10][0,38])]
622
+
623
+ self[13] = Ms::Id::Peptide.sequence_to_aaseq(self[9])
624
+
625
+ fh.read(6) if unpack_35
626
+
627
+ self
628
+ end
629
+
630
+ end
631
+
632
+
633
+ Ms::Sequest::Srf::Out::Prot = Arrayclass.new( %w(reference peps) )
634
+
635
+ class Ms::Sequest::Srf::Out::Prot
636
+ include Ms::Id::Protein
637
+ ## we shouldn't have to do this because this is inlcuded in SpecID::Prot, but
638
+ ## under some circumstances it won't work without explicitly calling it.
639
+ #include ProteinReferenceable
640
+
641
+ tmp = $VERBOSE ; $VERBOSE = nil
642
+ def initialize(reference=nil, peps=[])
643
+ #super(@@arr_size)
644
+ super(self.class.size)
645
+ #@reference = reference
646
+ #@peps = peps
647
+ self[0,2] = reference, peps
648
+ end
649
+ $VERBOSE = tmp
650
+
651
+ # "<Ms::Sequest::Srf::Out::Prot reference=\"#{@reference}\">"
652
+
653
+ undef_method :inspect
654
+ def inspect
655
+ "<Ms::Sequest::Srf::Out::Prot @reference=#{reference}, @peps(#)=#{peps.size}>"
656
+ end
657
+ end
658
+
659
+ class Ms::Sequest::SrfGroup
660
+ include Ms::Id::SearchGroup
661
+
662
+ # inherets an array of Ms::Sequest::Srf::Out::Pep objects
663
+ # inherets an array of Ms::Sequest::Srf::Out::Prot objects
664
+
665
+ # see Ms::Id::Search for acceptable arguments
666
+ # (filename, filenames, array of objects)
667
+ # opts =
668
+ # :filter_by_precursor_mass_tolerance => true | false (default true)
669
+ def initialize(arg, opts={}, &block)
670
+ orig_opts = opts.dup
671
+ indiv_opts = { :link_protein_hits => false }
672
+ super(arg, opts.merge(indiv_opts)) do
673
+ unless orig_opts[:link_protein_hits] == false
674
+ puts "MERGING GROUP!"
675
+ (@peps, @prots) = merge!(@searches.map {|v| v.peps }) do |_prot, _peps|
676
+ Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps)
677
+ end
678
+ end
679
+ end
680
+ block.call(self) if block_given?
681
+ end
682
+
683
+ def search_class
684
+ Ms::Sequest::Srf
685
+ end
686
+
687
+ # returns the filename used
688
+ # if the file exists, the name will be expanded to full path, otherwise just
689
+ # what is given
690
+ def to_srg(srg_filename='bioworks.srg')
691
+ File.open(srg_filename, 'w') do |v|
692
+ @filenames.each do |srf_file|
693
+ if File.exist? srf_file
694
+ v.puts File.expand_path(srf_file)
695
+ else
696
+ v.puts srf_file
697
+ end
698
+ end
699
+ end
700
+ srg_filename
701
+ end
702
+ end
703
+
704
+
705
+
706
+
707
+