ms-sequest 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/History ADDED
@@ -0,0 +1,8 @@
1
+ == 0.0.1 / 2009-05-11
2
+
3
+ * pulled out of mspire core
4
+
5
+ == 0.0.2 / 2009-05-14
6
+
7
+ * Basic SRF to SQT translation working
8
+ * SQT reading working
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2006 University of Texas at Austin, Regents of the University of
2
+ Colorado, and Howard Hughes Medical Institute.
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ SOFTWARE.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ = {Ms-Sequest}[http://mspire.rubyforge.org/projects/ms-sequest]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/jtprince/ms-sequest/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Sequest is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-sequest
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2006 University of Texas at Austin
20
+ Copyright (c) Regents of the University of Colorado and Howard Hughes Medical Institute.
21
+ Developer:: {John Prince}, {Edward Marcotte Lab}[http://polaris.icmb.utexas.edu/home.html], {Natalie Ahn Lab}[http://www.colorado.edu/chem/people/ahnn.html], {Howard Hughes Medical Institute}[http://www.hhmi.org/], {BYU Dept. of Chemistry and Biochemistry}[http://www.chem.byu.edu/]
22
+ Support::
23
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
data/lib/ms/sequest.rb ADDED
@@ -0,0 +1,6 @@
1
+
2
+ module Ms
3
+ module Sequest
4
+ VERSION = '0.0.2'
5
+ end
6
+ end
@@ -0,0 +1,343 @@
1
+ require 'ms/mass/aa'
2
+
3
+ # In the future, this guy should accept any version of bioworks params file
4
+ # and spit out any param queried.
5
+
6
+ module Ms ; end
7
+ module Ms::Sequest ; end
8
+
9
+ # 1) provides a reader and simple parameter lookup for SEQUEST params files
10
+ # supporting Bioworks 3.1-3.3.1.
11
+ # params = Ms::Sequest::Params.new("sequest.params") # filename by default
12
+ # params = Ms::Sequest::Params.new.parse_io(some_io_object)
13
+ #
14
+ # params.some_parameter # => any parameter defined has a method
15
+ # params.nonexistent_parameter # => nil
16
+ #
17
+ # Provides consistent behavior between different versions important info:
18
+ #
19
+ # # some basic methods shared by all versions:
20
+ # params.version # => '3.1' | '3.2' | '3.3'
21
+ # params.enzyme # => enzyme name with no parentheses
22
+ # params.min_number_termini
23
+ # params.database # => first_database_name
24
+ # params.enzyme_specificity # => [offset, cleave_at, expect_if_after]
25
+ # params.precursor_mass_type # => "average" | "monoisotopic"
26
+ # params.fragment_mass_type # => "average" | "monoisotopic"
27
+ #
28
+ # # some backwards/forwards compatibility methods:
29
+ # params.max_num_internal_cleavages # == max_num_internal_cleavage_sites
30
+ # params.fragment_ion_tol # => fragment_ion_tolerance
31
+ #
32
+ class Ms::Sequest::Params
33
+
34
+ Bioworks31_Enzyme_Info_Array = [
35
+ ['No_Enzyme', 0, '-', '-'], # 0
36
+ ['Trypsin', 1, 'KR', '-'], # 1
37
+ ['Trypsin(KRLNH)', 1, 'KRLNH', '-'], # 2
38
+ ['Chymotrypsin', 1, 'FWYL', '-'], # 3
39
+ ['Chymotrypsin(FWY)', 1, 'FWY', 'P'], # 4
40
+ ['Clostripain', 1, 'R', '-'], # 5
41
+ ['Cyanogen_Bromide', 1, 'M', '-'], # 6
42
+ ['IodosoBenzoate', 1, 'W', '-'], # 7
43
+ ['Proline_Endopept', 1, 'P', '-'], # 8
44
+ ['Staph_Protease', 1, 'E', '-'], # 9
45
+ ['Trypsin_K', 1, 'K', 'P'], # 10
46
+ ['Trypsin_R', 1, 'R', 'P'], # 11
47
+ ['GluC', 1, 'ED', '-'], # 12
48
+ ['LysC', 1, 'K', '-'], # 13
49
+ ['AspN', 0, 'D', '-'], # 14
50
+ ['Elastase', 1, 'ALIV', 'P'], # 15
51
+ ['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'], # 16
52
+ ]
53
+
54
+ # current attributes supported are:
55
+ # bioworks 3.2:
56
+ @@param_re = / = ?/o
57
+ @@param_two_split = ';'
58
+ @@sequest_line = /\[SEQUEST\]/o
59
+
60
+ # the general options
61
+ attr_accessor :opts
62
+ # the static weights added to amino acids
63
+ attr_accessor :mods
64
+
65
+ # all keys and values stored as strings!
66
+ # will accept a sequest.params file or .srf file
67
+ def initialize(file=nil)
68
+ if file
69
+ parse_file(file)
70
+ end
71
+ end
72
+
73
+ # returns hash of params up until add_U_user_amino_acid
74
+ def grab_params(fh)
75
+ hash = {}
76
+ in_add_amino_acid_section = false
77
+ add_section_re = /^\s*add_/
78
+ prev_pos = nil
79
+ while line = fh.gets
80
+ if line =~ add_section_re
81
+ in_add_amino_acid_section = true
82
+ end
83
+ if (in_add_amino_acid_section and !(line =~ add_section_re))
84
+ fh.pos = prev_pos
85
+ break
86
+ end
87
+ prev_pos = fh.pos
88
+ if line =~ /\w+/
89
+ one,two = line.split @@param_re
90
+ two,comment = two.split @@param_two_split
91
+ hash[one] = two.rstrip
92
+ end
93
+ end
94
+ hash
95
+ end
96
+
97
+ # returns self
98
+ def parse_io(fh)
99
+ # seek to the SEQUEST file
100
+ loop do
101
+ if fh.gets =~ @@sequest_line
102
+ # double check that we are in a sequest params file:
103
+ pos = fh.pos
104
+ if fh.gets =~ /^first_database_name/
105
+ fh.pos = pos
106
+ break
107
+ end
108
+ end
109
+ end
110
+ @opts = grab_params(fh)
111
+ @opts["search_engine"] = "SEQUEST"
112
+ # extract out the mods
113
+ @mods = {}
114
+ @opts.each do |k,v|
115
+ if k =~ /^add_/
116
+ @mods[k] = @opts.delete(k)
117
+ end
118
+ end
119
+
120
+ ## this gets rid of the .hdr postfix on indexed databases
121
+ @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
122
+ self
123
+ end
124
+
125
+ ## parses file
126
+ ## and drops the .hdr behind indexed fasta files
127
+ ## returns self
128
+ ## can read sequest.params file or .srf file handle
129
+ def parse_file(file)
130
+ File.open(file) do |fh|
131
+ parse_io(fh)
132
+ end
133
+ self
134
+ end
135
+
136
+ # returns( offset, cleave_at, except_if_after )
137
+ # offset is an Integer specifying how far after an amino acid to cut
138
+ # cleave_at is a string of all amino acids that should be cut at
139
+ # except_if_after for not cutting after those
140
+ # normal tryptic behavior would be: [1, 'KR', 'P']
141
+ # NOTE: a '-' in a params file is returned as an '' (empty string)
142
+ # AspN is [0,'D','']
143
+ def enzyme_specificity
144
+ enzyme_ar =
145
+ if version == '3.1'
146
+ Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
147
+ elsif version >= '3.2'
148
+ arr = enzyme_info.split(/\s+/)[2,3]
149
+ arr[0] = arr[0].to_i
150
+ arr
151
+ else
152
+ raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
153
+ end
154
+ enzyme_ar.map! do |str|
155
+ if str == '-' ; ''
156
+ else ; str
157
+ end
158
+ end
159
+ enzyme_ar
160
+ end
161
+
162
+ # Returns the version of the sequest.params file
163
+ # Returns String "3.3" if contains "fragment_ion_units"
164
+ # Returns String "3.2" if contains "enyzme_info"
165
+ # Returns String "3.1" if contains "enzyme_number"
166
+ def version
167
+ if @opts['fragment_ion_units'] ; return '3.3'
168
+ elsif @opts['enzyme_info'] ; return '3.2'
169
+ elsif @opts['enzyme_number'] ; return '3.1'
170
+ end
171
+ end
172
+
173
+ ####################################################
174
+ # TO PEPXML
175
+ ####################################################
176
+ # In some ways, this is merely translating to the older Bioworks
177
+ # sequest.params files
178
+
179
+ # I'm not sure if this is the right mapping for sequence_search_constraint?
180
+ def sequence
181
+ pseq = @opts['partial_sequence']
182
+ if !pseq || pseq == "" ; pseq = "0" end
183
+ pseq
184
+ end
185
+
186
+ def precursor_mass_type
187
+ case @opts['mass_type_parent']
188
+ when '0' ; "average"
189
+ when '1' ; "monoisotopic"
190
+ else ; abort "error in mass_type_parent in sequest!"
191
+ end
192
+ end
193
+
194
+ def fragment_mass_type
195
+ fmtype =
196
+ case @opts['mass_type_fragment']
197
+ when '0' ; "average"
198
+ when '1' ; "monoisotopic"
199
+ else ; abort "error in mass_type_fragment in sequest!"
200
+ end
201
+ end
202
+
203
+ def method_missing(name, *args)
204
+ string = name.to_s
205
+ if @opts.key?(string) ; return @opts[string]
206
+ elsif @mods.key?(string) ; return @mods[string]
207
+ else ; return nil
208
+ end
209
+ end
210
+
211
+ ## We only need to define values if they are different than sequest.params
212
+ ## The method_missing will look them up in the hash!
213
+
214
+ # Returns a system independent basename
215
+ # Splits on "\" or "/"
216
+ def _sys_ind_basename(file)
217
+ return file.split(/[\\\/]/)[-1]
218
+ end
219
+
220
+ # changes the path of the database
221
+ def database_path=(newpath)
222
+ db = @opts["first_database_name"]
223
+ newpath = File.join(newpath, _sys_ind_basename(db))
224
+ @opts["first_database_name"] = newpath
225
+ end
226
+
227
+ def database
228
+ @opts["first_database_name"]
229
+ end
230
+
231
+ # returns the appropriate aminoacid mass lookup table from Ms::Mass::AA
232
+ # based_on may be :precursor or :fragment
233
+ def mass_index(based_on=:precursor)
234
+ reply = case based_on
235
+ when :precursor : precursor_mass_type
236
+ when :fragment : fragment_mass_type
237
+ end
238
+ case reply
239
+ when 'average'
240
+ Ms::Mass::AA::AVG
241
+ when 'monoisotopic'
242
+ Ms::Mass::AA::MONO
243
+ end
244
+ end
245
+
246
+ # at least in Bioworks 3.2, the First number after the enzyme
247
+ # is the indication of the enzymatic end stringency (required):
248
+ # 1 = Fully enzymatic
249
+ # 2 = Either end
250
+ # 3 = N terminal only
251
+ # 4 = C terminal only
252
+ # So, to get min_number_termini we map like this:
253
+ # 1 => 2
254
+ # 2 => 1
255
+ def min_number_termini
256
+ if e_info = @opts["enzyme_info"]
257
+ case e_info.split(" ")[1]
258
+ when "1": return "2"
259
+ when "2": return "1"
260
+ end
261
+ end
262
+ warn "No Enzyme termini info, using min_number_termini = '1'"
263
+ return "1"
264
+ end
265
+
266
+ ## returns a SampleEnzyme object
267
+ #def sample_enzyme
268
+ # (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
269
+ # if v == '' ; nil ; else v end
270
+ # end
271
+ # SampleEnzyme.new do |se|
272
+ # se.name = self.enzyme
273
+ # se.cut = cleave_at
274
+ # se.no_cut = except_if_after
275
+ # se.sense =
276
+ # if se.name == "No_Enzyme"
277
+ # nil
278
+ # elsif offset == 1
279
+ # 'C'
280
+ # elsif offset == 0
281
+ # 'N'
282
+ # end
283
+ # end
284
+ #end
285
+
286
+ # returns the enzyme name (but no parentheses connected with the name).
287
+ # this will likely be capitalized.
288
+ def enzyme
289
+ v = self.version
290
+ basic_name =
291
+ if v == '3.1'
292
+ Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
293
+ elsif v >= '3.2'
294
+ @opts["enzyme_info"]
295
+ end
296
+ basic_name.split('(')[0]
297
+ end
298
+
299
+ def max_num_internal_cleavages
300
+ @opts["max_num_internal_cleavage_sites"]
301
+ end
302
+
303
+ # my take on peptide_mass_units:
304
+ # (see http://www.ionsource.com/tutorial/isotopes/slide2.htm)
305
+ # amu = atomic mass units = (mass_real - mass_measured).abs (??abs??)
306
+ # mmu = milli mass units (amu / 1000)
307
+ # ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
308
+
309
+ def peptide_mass_tol
310
+ if @opts["peptide_mass_units"] != "0"
311
+ puts "WARNING: peptide_mass_tol units need to be adjusted!"
312
+ end
313
+ @opts["peptide_mass_tolerance"]
314
+ end
315
+
316
+ def fragment_ion_tol
317
+ @opts["fragment_ion_tolerance"]
318
+ end
319
+
320
+ def max_num_differential_AA_per_mod
321
+ @opts["max_num_differential_AA_per_mod"] || @opts["max_num_differential_per_peptide"]
322
+ end
323
+
324
+ # returns a hash by add_<whatever> of any static mods != 0
325
+ # the values are still as strings
326
+ def static_mods
327
+ hash = {}
328
+ @mods.each do |k,v|
329
+ if v.to_f != 0.0
330
+ hash[k] = v
331
+ end
332
+ end
333
+ hash
334
+ end
335
+
336
+ ## @TODO: We could add some of the parameters not currently being asked for to be more complete
337
+ ## @TODO: We could always add the Bioworks 3.2 specific params as params
338
+
339
+ ####################################################
340
+ ####################################################
341
+
342
+ end
343
+
@@ -0,0 +1,363 @@
1
+
2
+ require 'ms/fasta'
3
+ require 'arrayclass'
4
+ require 'set'
5
+
6
+ require 'ms/id/peptide'
7
+ require 'ms/id/search'
8
+
9
+ module Ms
10
+ module Sequest
11
+ class SqtGroup
12
+ include Ms::Id::SearchGroup
13
+
14
+ #attr_accessor :sqts, :filenames
15
+
16
+ def search_class
17
+ Ms::Sequest::Sqt
18
+ end
19
+
20
+ def extension() 'sqg' end
21
+
22
+ def initialize(arg, opts={}, &block)
23
+ orig_opts = opts.dup
24
+ indiv_opts = { :link_protein_hits => false }
25
+ super(arg, opts.merge(indiv_opts)) do
26
+ unless orig_opts[:link_protein_hits] == false
27
+ puts "MERGING GROUP!"
28
+ (@peps, @prots) = merge!(@searches.map {|v| v.peps }, &Ms::Sequest::Sqt::NEW_PROT)
29
+ end
30
+ end
31
+ block.call(self) if block_given?
32
+ end
33
+
34
+
35
+ # # NOTE THAT this is copy/paste from srf.rb, should be refactored...
36
+ ## returns the filename used
37
+ ## if the file exists, the name will be expanded to full path, otherwise just
38
+ ## what is given
39
+ #def to_sqg(sqg_filename='bioworks.sqg')
40
+ #File.open(sqg_filename, 'w') do |v|
41
+ #@filenames.each do |sqt_file|
42
+ #if File.exist? sqt_file
43
+ #v.puts File.expand_path(sqt_file)
44
+ #else
45
+ #v.puts sqt_file
46
+ #end
47
+ #end
48
+ #end
49
+ #sqg_filename
50
+ #end
51
+
52
+ end # SqtGroup
53
+
54
+
55
+ class Sqt
56
+ include Ms::Id::Search
57
+ PercolatorHeaderMatch = /^Percolator v/
58
+ Delimiter = "\t"
59
+ attr_accessor :header
60
+ attr_accessor :spectra
61
+ attr_accessor :base_name
62
+ # boolean
63
+ attr_accessor :percolator_results
64
+
65
+ # assumes the file exists and is readable
66
+ # returns [DBSeqLength, DBLocusCount, DBMD5Sum] or nil if no file
67
+ def self.get_db_info(dbfile)
68
+ Ms::Fasta.open(dbfile) do |fasta|
69
+ [fasta.total_sequence_length, fasta.size, fasta.md5_sum]
70
+ end
71
+ end
72
+
73
+ def protein_class
74
+ Ms::Sequest::Sqt::Locus
75
+ end
76
+
77
+ # opts =
78
+ # :percolator_results => false | true (default false)
79
+ # :link_protein_hits => true | false (default true)
80
+ def initialize(filename=nil, opts={})
81
+ @peps = []
82
+ @prots = []
83
+ if filename
84
+ from_file(filename, opts)
85
+ end
86
+ end
87
+
88
+ NEW_PROT = lambda do |_prot, _peps|
89
+ Ms::Sequest::Sqt::Locus.new([_prot.locus, _prot.description, _peps])
90
+ end
91
+
92
+ # if the file contains the header key '/$Percolator v/' then the results
93
+ # will be interpreted as percolator results regardless of the value
94
+ # passed in.
95
+ def from_file(filename, opts={})
96
+ opts = {:percolator_results=>false, :link_protein_hits => true}.merge(opts)
97
+ @percolator_results = opts[:percolator_results]
98
+ @base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
99
+ File.open(filename) do |fh|
100
+ @header = Ms::Sequest::Sqt::Header.new.from_handle(fh)
101
+ if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
102
+ @percolator_results = true
103
+ end
104
+ (@spectra, @peps) = Ms::Sequest::Sqt::Spectrum.spectra_from_handle(fh, @base_name, @percolator_results)
105
+ end
106
+ if opts[:link_protein_hits]
107
+ (@peps, @prots) = merge!([@peps], &NEW_PROT)
108
+ end
109
+ end
110
+
111
+
112
+ # Inherits from hash, so all header stuff can be accessed by key. Multiline
113
+ # values will be pushed into an array.
114
+ # All header values are stored as (newline-removed) strings!
115
+ class Header < Hash
116
+ Leader = 'H'
117
+
118
+ # These will be in arrays no matter what: StaticMod, DynamicMod, Comment
119
+ # Any other keys repeated will be shoved into an array; otherwise a string
120
+ Arrayed = %w(DyanmicMod StaticMod Comment).to_set
121
+
122
+ HeaderKeys = {
123
+ :sqt_generator => 'SQTGenerator',
124
+ :sqt_generator_version => 'SQTGeneratorVersion',
125
+ :database => 'Database',
126
+ :fragment_masses => 'FragmentMasses',
127
+ :precursor_masses => 'PrecursorMasses',
128
+ :start_time => 'StartTime',
129
+ :db_seq_length => 'DBSeqLength',
130
+ :db_locus_count => 'DBLocusCount',
131
+ :db_md5sum => 'DBMD5Sum',
132
+ :peptide_mass_tolerance => 'Alg-PreMassTol',
133
+ :fragment_ion_tolerance => 'Alg-FragMassTol',
134
+ # nonstandard (mine)
135
+ :peptide_mass_units => 'Alg-PreMassUnits',
136
+ :ion_series => 'Alg-IonSeries',
137
+ :enzyme => 'Alg-Enzyme',
138
+ # nonstandard (mine)
139
+ :ms_model => 'Alg-MSModel',
140
+ :static_mods => 'StaticMod',
141
+ :dynamic_mods => 'DynamicMod',
142
+ :comments => 'Comment'
143
+ }
144
+
145
+
146
+ KeysToAtts = HeaderKeys.invert
147
+
148
+ HeaderKeys.keys.each do |ky|
149
+ attr_accessor ky
150
+ end
151
+
152
+ def from_handle(fh)
153
+ Arrayed.each do |ky|
154
+ self[ky] = []
155
+ end
156
+ pos = fh.pos
157
+ lines = []
158
+ loop do
159
+ line = fh.gets
160
+ if line && (line[0,1] == Ms::Sequest::Sqt::Header::Leader )
161
+ lines << line
162
+ else # reset the fh.pos and we're done
163
+ fh.pos = pos
164
+ break
165
+ end
166
+ pos = fh.pos
167
+ end
168
+ from_lines(lines)
169
+ end
170
+
171
+ def from_lines(array_of_header_lines)
172
+ array_of_header_lines.each do |line|
173
+ line.chomp!
174
+ (ky, *rest) = line.split(Ms::Sequest::Sqt::Delimiter)[1..-1]
175
+ # just in case they have any tabs in their field
176
+ value = rest.join(Ms::Sequest::Sqt::Delimiter)
177
+ if Arrayed.include?(ky)
178
+ self[ky] << value
179
+ elsif self.key? ky # already exists
180
+ if self[ky].is_a? Array
181
+ self[ky] << value
182
+ else
183
+ self[ky] = [self[ky], value]
184
+ end
185
+ else # normal
186
+ self[ky] = value
187
+ end
188
+ end
189
+ KeysToAtts.each do |ky,methd|
190
+ self.send("#{methd}=".to_sym, self[ky])
191
+ end
192
+ self
193
+ end
194
+
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ # all are cast as expected (total_intensity is a float)
201
+ # mh = observed mh
202
+ Ms::Sequest::Sqt::Spectrum = Arrayclass.new(%w[first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches])
203
+
204
+ # 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
205
+
206
+ class Ms::Sequest::Sqt::Spectrum
207
+ Leader = 'S'
208
+
209
+ # assumes the first line starts with an 'S'
210
+ def self.spectra_from_handle(fh, base_name, percolator_results=false)
211
+ peps = []
212
+ spectra = []
213
+
214
+ while line = fh.gets
215
+ case line[0,1]
216
+ when Ms::Sequest::Sqt::Spectrum::Leader
217
+ spectrum = Ms::Sequest::Sqt::Spectrum.new.from_line( line )
218
+ spectra << spectrum
219
+ matches = []
220
+ spectrum.matches = matches
221
+ when Ms::Sequest::Sqt::Match::Leader
222
+ match_klass = if percolator_results
223
+ Ms::Sequest::Sqt::Match::Percolator
224
+ else
225
+ Ms::Sequest::Sqt::Match
226
+ end
227
+ match = match_klass.new.from_line( line )
228
+ match[10,3] = spectrum[0,3]
229
+ match[15] = base_name
230
+ matches << match
231
+ peps << match
232
+ loci = []
233
+ match.loci = loci
234
+ matches << match
235
+ when Ms::Sequest::Sqt::Locus::Leader
236
+ line.chomp!
237
+ key = line.split(Ms::Sequest::Sqt::Delimiter)[1]
238
+ locus = Ms::Sequest::Sqt::Locus.new.from_line( line )
239
+ loci << locus
240
+ end
241
+ end
242
+ # set the deltacn:
243
+ set_deltacn(spectra)
244
+ [spectra, peps]
245
+ end
246
+
247
+ def self.set_deltacn(spectra)
248
+ spectra.each do |spec|
249
+ matches = spec.matches
250
+ if matches.size > 0
251
+
252
+ (0...(matches.size-1)).each do |i|
253
+ matches[i].deltacn = matches[i+1].deltacn_orig
254
+ end
255
+ matches[-1].deltacn = 1.1
256
+ end
257
+ end
258
+ spectra
259
+ end
260
+
261
+
262
+ # returns an array -> [the next spectra line (or nil if eof), spectrum]
263
+ def from_line(line)
264
+ line.chomp!
265
+ ar = line.split(Ms::Sequest::Sqt::Delimiter)
266
+ self[0] = ar[1].to_i
267
+ self[1] = ar[2].to_i
268
+ self[2] = ar[3].to_i
269
+ self[3] = ar[4].to_f
270
+ self[4] = ar[5]
271
+ self[5] = ar[6].to_f
272
+ self[6] = ar[7].to_f
273
+ self[7] = ar[8].to_f
274
+ self[8] = ar[9].to_i
275
+ self[9] = []
276
+ self
277
+ end
278
+ end
279
+
280
+ # Sqt format uses only indices 0 - 9
281
+ Ms::Sequest::Sqt::Match = Arrayclass.new(%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci])
282
+
283
+ # 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
284
+
285
+ # rxcorr = rank by xcorr
286
+ # rsp = rank by sp
287
+ # NOTE:
288
+ # deltacn_orig
289
+ # deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
290
+ # give the last one 1.1)
291
+ class Ms::Sequest::Sqt::Match
292
+ Leader = 'M'
293
+
294
+ # same as 'loci'
295
+ def prots
296
+ self[16]
297
+ end
298
+
299
+ def from_line(line)
300
+ line.chomp!
301
+ ar = line.split(Ms::Sequest::Sqt::Delimiter)
302
+ self[0] = ar[1].to_i
303
+ self[1] = ar[2].to_i
304
+ self[2] = ar[3].to_f
305
+ self[3] = ar[4].to_f
306
+ self[4] = ar[5].to_f
307
+ self[5] = ar[6].to_f
308
+ self[6] = ar[7].to_i
309
+ self[7] = ar[8].to_i
310
+ self[8] = ar[9]
311
+ self[9] = ar[10]
312
+ self[14] = Ms::Id::Peptide.sequence_to_aaseq(self[8])
313
+ self
314
+ end
315
+ end
316
+
317
+
318
+ class Ms::Sequest::Sqt::Match::Percolator < Ms::Sequest::Sqt::Match
319
+ # we will keep access to these old terms since we can then access routines
320
+ # that sort on xcorr...
321
+ #undef_method :xcorr
322
+ #undef_method :xcorr=
323
+ #undef_method :sp
324
+ #undef_method :sp=
325
+
326
+ def percolator_score
327
+ self[4]
328
+ end
329
+ def percolator_score=(score)
330
+ self[4] = score
331
+ end
332
+ def negative_q_value
333
+ self[5]
334
+ end
335
+ def negative_q_value=(arg)
336
+ self[5] = arg
337
+ end
338
+ def q_value
339
+ -self[5]
340
+ end
341
+ # for compatibility with scripts that want this guy
342
+ def probability
343
+ -self[5]
344
+ end
345
+ end
346
+
347
+ Ms::Sequest::Sqt::Locus = Arrayclass.new(%w[locus description peps])
348
+
349
+ class Ms::Sequest::Sqt::Locus
350
+ Leader = 'L'
351
+
352
+ def first_entry ; self[0] end
353
+ def reference ; self[0] end
354
+
355
+ def from_line(line)
356
+ line.chomp!
357
+ ar = line.split(Ms::Sequest::Sqt::Delimiter)
358
+ self[0] = ar[1]
359
+ self[1] = ar[2]
360
+ self
361
+ end
362
+
363
+ end