ms-sequest 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History ADDED
@@ -0,0 +1,8 @@
1
+ == 0.0.1 / 2009-05-11
2
+
3
+ * pulled out of mspire core
4
+
5
+ == 0.0.2 / 2009-05-14
6
+
7
+ * Basic SRF to SQT translation working
8
+ * SQT reading working
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2006 University of Texas at Austin, Regents of the University of
2
+ Colorado, and Howard Hughes Medical Institute.
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ SOFTWARE.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ = {Ms-Sequest}[http://mspire.rubyforge.org/projects/ms-sequest]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/jtprince/ms-sequest/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Sequest is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-sequest
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2006 University of Texas at Austin
20
+ Copyright (c) Regents of the University of Colorado and Howard Hughes Medical Institute.
21
+ Developer:: {John Prince}, {Edward Marcotte Lab}[http://polaris.icmb.utexas.edu/home.html], {Natalie Ahn Lab}[http://www.colorado.edu/chem/people/ahnn.html], {Howard Hughes Medical Institute}[http://www.hhmi.org/], {BYU Dept. of Chemistry and Biochemistry}[http://www.chem.byu.edu/]
22
+ Support::
23
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
data/lib/ms/sequest.rb ADDED
@@ -0,0 +1,6 @@
1
+
2
+ module Ms
3
+ module Sequest
4
+ VERSION = '0.0.2'
5
+ end
6
+ end
@@ -0,0 +1,343 @@
1
+ require 'ms/mass/aa'
2
+
3
+ # In the future, this guy should accept any version of bioworks params file
4
+ # and spit out any param queried.
5
+
6
+ module Ms ; end
7
+ module Ms::Sequest ; end
8
+
9
+ # 1) provides a reader and simple parameter lookup for SEQUEST params files
10
+ # supporting Bioworks 3.1-3.3.1.
11
+ # params = Ms::Sequest::Params.new("sequest.params") # filename by default
12
+ # params = Ms::Sequest::Params.new.parse_io(some_io_object)
13
+ #
14
+ # params.some_parameter # => any parameter defined has a method
15
+ # params.nonexistent_parameter # => nil
16
+ #
17
+ # Provides consistent behavior between different versions important info:
18
+ #
19
+ # # some basic methods shared by all versions:
20
+ # params.version # => '3.1' | '3.2' | '3.3'
21
+ # params.enzyme # => enzyme name with no parentheses
22
+ # params.min_number_termini
23
+ # params.database # => first_database_name
24
+ # params.enzyme_specificity # => [offset, cleave_at, expect_if_after]
25
+ # params.precursor_mass_type # => "average" | "monoisotopic"
26
+ # params.fragment_mass_type # => "average" | "monoisotopic"
27
+ #
28
+ # # some backwards/forwards compatibility methods:
29
+ # params.max_num_internal_cleavages # == max_num_internal_cleavage_sites
30
+ # params.fragment_ion_tol # => fragment_ion_tolerance
31
+ #
32
+ class Ms::Sequest::Params
33
+
34
+ Bioworks31_Enzyme_Info_Array = [
35
+ ['No_Enzyme', 0, '-', '-'], # 0
36
+ ['Trypsin', 1, 'KR', '-'], # 1
37
+ ['Trypsin(KRLNH)', 1, 'KRLNH', '-'], # 2
38
+ ['Chymotrypsin', 1, 'FWYL', '-'], # 3
39
+ ['Chymotrypsin(FWY)', 1, 'FWY', 'P'], # 4
40
+ ['Clostripain', 1, 'R', '-'], # 5
41
+ ['Cyanogen_Bromide', 1, 'M', '-'], # 6
42
+ ['IodosoBenzoate', 1, 'W', '-'], # 7
43
+ ['Proline_Endopept', 1, 'P', '-'], # 8
44
+ ['Staph_Protease', 1, 'E', '-'], # 9
45
+ ['Trypsin_K', 1, 'K', 'P'], # 10
46
+ ['Trypsin_R', 1, 'R', 'P'], # 11
47
+ ['GluC', 1, 'ED', '-'], # 12
48
+ ['LysC', 1, 'K', '-'], # 13
49
+ ['AspN', 0, 'D', '-'], # 14
50
+ ['Elastase', 1, 'ALIV', 'P'], # 15
51
+ ['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'], # 16
52
+ ]
53
+
54
+ # current attributes supported are:
55
+ # bioworks 3.2:
56
+ @@param_re = / = ?/o
57
+ @@param_two_split = ';'
58
+ @@sequest_line = /\[SEQUEST\]/o
59
+
60
+ # the general options
61
+ attr_accessor :opts
62
+ # the static weights added to amino acids
63
+ attr_accessor :mods
64
+
65
+ # all keys and values stored as strings!
66
+ # will accept a sequest.params file or .srf file
67
+ def initialize(file=nil)
68
+ if file
69
+ parse_file(file)
70
+ end
71
+ end
72
+
73
+ # returns hash of params up until add_U_user_amino_acid
74
+ def grab_params(fh)
75
+ hash = {}
76
+ in_add_amino_acid_section = false
77
+ add_section_re = /^\s*add_/
78
+ prev_pos = nil
79
+ while line = fh.gets
80
+ if line =~ add_section_re
81
+ in_add_amino_acid_section = true
82
+ end
83
+ if (in_add_amino_acid_section and !(line =~ add_section_re))
84
+ fh.pos = prev_pos
85
+ break
86
+ end
87
+ prev_pos = fh.pos
88
+ if line =~ /\w+/
89
+ one,two = line.split @@param_re
90
+ two,comment = two.split @@param_two_split
91
+ hash[one] = two.rstrip
92
+ end
93
+ end
94
+ hash
95
+ end
96
+
97
+ # returns self
98
+ def parse_io(fh)
99
+ # seek to the SEQUEST file
100
+ loop do
101
+ if fh.gets =~ @@sequest_line
102
+ # double check that we are in a sequest params file:
103
+ pos = fh.pos
104
+ if fh.gets =~ /^first_database_name/
105
+ fh.pos = pos
106
+ break
107
+ end
108
+ end
109
+ end
110
+ @opts = grab_params(fh)
111
+ @opts["search_engine"] = "SEQUEST"
112
+ # extract out the mods
113
+ @mods = {}
114
+ @opts.each do |k,v|
115
+ if k =~ /^add_/
116
+ @mods[k] = @opts.delete(k)
117
+ end
118
+ end
119
+
120
+ ## this gets rid of the .hdr postfix on indexed databases
121
+ @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
122
+ self
123
+ end
124
+
125
+ ## parses file
126
+ ## and drops the .hdr behind indexed fasta files
127
+ ## returns self
128
+ ## can read sequest.params file or .srf file handle
129
+ def parse_file(file)
130
+ File.open(file) do |fh|
131
+ parse_io(fh)
132
+ end
133
+ self
134
+ end
135
+
136
+ # returns( offset, cleave_at, except_if_after )
137
+ # offset is an Integer specifying how far after an amino acid to cut
138
+ # cleave_at is a string of all amino acids that should be cut at
139
+ # except_if_after for not cutting after those
140
+ # normal tryptic behavior would be: [1, 'KR', 'P']
141
+ # NOTE: a '-' in a params file is returned as an '' (empty string)
142
+ # AspN is [0,'D','']
143
+ def enzyme_specificity
144
+ enzyme_ar =
145
+ if version == '3.1'
146
+ Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
147
+ elsif version >= '3.2'
148
+ arr = enzyme_info.split(/\s+/)[2,3]
149
+ arr[0] = arr[0].to_i
150
+ arr
151
+ else
152
+ raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
153
+ end
154
+ enzyme_ar.map! do |str|
155
+ if str == '-' ; ''
156
+ else ; str
157
+ end
158
+ end
159
+ enzyme_ar
160
+ end
161
+
162
+ # Returns the version of the sequest.params file
163
+ # Returns String "3.3" if contains "fragment_ion_units"
164
+ # Returns String "3.2" if contains "enyzme_info"
165
+ # Returns String "3.1" if contains "enzyme_number"
166
+ def version
167
+ if @opts['fragment_ion_units'] ; return '3.3'
168
+ elsif @opts['enzyme_info'] ; return '3.2'
169
+ elsif @opts['enzyme_number'] ; return '3.1'
170
+ end
171
+ end
172
+
173
+ ####################################################
174
+ # TO PEPXML
175
+ ####################################################
176
+ # In some ways, this is merely translating to the older Bioworks
177
+ # sequest.params files
178
+
179
+ # I'm not sure if this is the right mapping for sequence_search_constraint?
180
+ def sequence
181
+ pseq = @opts['partial_sequence']
182
+ if !pseq || pseq == "" ; pseq = "0" end
183
+ pseq
184
+ end
185
+
186
+ def precursor_mass_type
187
+ case @opts['mass_type_parent']
188
+ when '0' ; "average"
189
+ when '1' ; "monoisotopic"
190
+ else ; abort "error in mass_type_parent in sequest!"
191
+ end
192
+ end
193
+
194
+ def fragment_mass_type
195
+ fmtype =
196
+ case @opts['mass_type_fragment']
197
+ when '0' ; "average"
198
+ when '1' ; "monoisotopic"
199
+ else ; abort "error in mass_type_fragment in sequest!"
200
+ end
201
+ end
202
+
203
+ def method_missing(name, *args)
204
+ string = name.to_s
205
+ if @opts.key?(string) ; return @opts[string]
206
+ elsif @mods.key?(string) ; return @mods[string]
207
+ else ; return nil
208
+ end
209
+ end
210
+
211
+ ## We only need to define values if they are different than sequest.params
212
+ ## The method_missing will look them up in the hash!
213
+
214
+ # Returns a system independent basename
215
+ # Splits on "\" or "/"
216
+ def _sys_ind_basename(file)
217
+ return file.split(/[\\\/]/)[-1]
218
+ end
219
+
220
+ # changes the path of the database
221
+ def database_path=(newpath)
222
+ db = @opts["first_database_name"]
223
+ newpath = File.join(newpath, _sys_ind_basename(db))
224
+ @opts["first_database_name"] = newpath
225
+ end
226
+
227
+ def database
228
+ @opts["first_database_name"]
229
+ end
230
+
231
+ # returns the appropriate aminoacid mass lookup table from Ms::Mass::AA
232
+ # based_on may be :precursor or :fragment
233
+ def mass_index(based_on=:precursor)
234
+ reply = case based_on
235
+ when :precursor : precursor_mass_type
236
+ when :fragment : fragment_mass_type
237
+ end
238
+ case reply
239
+ when 'average'
240
+ Ms::Mass::AA::AVG
241
+ when 'monoisotopic'
242
+ Ms::Mass::AA::MONO
243
+ end
244
+ end
245
+
246
+ # at least in Bioworks 3.2, the First number after the enzyme
247
+ # is the indication of the enzymatic end stringency (required):
248
+ # 1 = Fully enzymatic
249
+ # 2 = Either end
250
+ # 3 = N terminal only
251
+ # 4 = C terminal only
252
+ # So, to get min_number_termini we map like this:
253
+ # 1 => 2
254
+ # 2 => 1
255
+ def min_number_termini
256
+ if e_info = @opts["enzyme_info"]
257
+ case e_info.split(" ")[1]
258
+ when "1": return "2"
259
+ when "2": return "1"
260
+ end
261
+ end
262
+ warn "No Enzyme termini info, using min_number_termini = '1'"
263
+ return "1"
264
+ end
265
+
266
+ ## returns a SampleEnzyme object
267
+ #def sample_enzyme
268
+ # (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
269
+ # if v == '' ; nil ; else v end
270
+ # end
271
+ # SampleEnzyme.new do |se|
272
+ # se.name = self.enzyme
273
+ # se.cut = cleave_at
274
+ # se.no_cut = except_if_after
275
+ # se.sense =
276
+ # if se.name == "No_Enzyme"
277
+ # nil
278
+ # elsif offset == 1
279
+ # 'C'
280
+ # elsif offset == 0
281
+ # 'N'
282
+ # end
283
+ # end
284
+ #end
285
+
286
+ # returns the enzyme name (but no parentheses connected with the name).
287
+ # this will likely be capitalized.
288
+ def enzyme
289
+ v = self.version
290
+ basic_name =
291
+ if v == '3.1'
292
+ Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
293
+ elsif v >= '3.2'
294
+ @opts["enzyme_info"]
295
+ end
296
+ basic_name.split('(')[0]
297
+ end
298
+
299
+ def max_num_internal_cleavages
300
+ @opts["max_num_internal_cleavage_sites"]
301
+ end
302
+
303
+ # my take on peptide_mass_units:
304
+ # (see http://www.ionsource.com/tutorial/isotopes/slide2.htm)
305
+ # amu = atomic mass units = (mass_real - mass_measured).abs (??abs??)
306
+ # mmu = milli mass units (amu / 1000)
307
+ # ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
308
+
309
+ def peptide_mass_tol
310
+ if @opts["peptide_mass_units"] != "0"
311
+ puts "WARNING: peptide_mass_tol units need to be adjusted!"
312
+ end
313
+ @opts["peptide_mass_tolerance"]
314
+ end
315
+
316
+ def fragment_ion_tol
317
+ @opts["fragment_ion_tolerance"]
318
+ end
319
+
320
+ def max_num_differential_AA_per_mod
321
+ @opts["max_num_differential_AA_per_mod"] || @opts["max_num_differential_per_peptide"]
322
+ end
323
+
324
+ # returns a hash by add_<whatever> of any static mods != 0
325
+ # the values are still as strings
326
+ def static_mods
327
+ hash = {}
328
+ @mods.each do |k,v|
329
+ if v.to_f != 0.0
330
+ hash[k] = v
331
+ end
332
+ end
333
+ hash
334
+ end
335
+
336
+ ## @TODO: We could add some of the parameters not currently being asked for to be more complete
337
+ ## @TODO: We could always add the Bioworks 3.2 specific params as params
338
+
339
+ ####################################################
340
+ ####################################################
341
+
342
+ end
343
+
@@ -0,0 +1,363 @@
1
+
2
+ require 'ms/fasta'
3
+ require 'arrayclass'
4
+ require 'set'
5
+
6
+ require 'ms/id/peptide'
7
+ require 'ms/id/search'
8
+
9
+ module Ms
10
+ module Sequest
11
+ class SqtGroup
12
+ include Ms::Id::SearchGroup
13
+
14
+ #attr_accessor :sqts, :filenames
15
+
16
+ def search_class
17
+ Ms::Sequest::Sqt
18
+ end
19
+
20
+ def extension() 'sqg' end
21
+
22
+ def initialize(arg, opts={}, &block)
23
+ orig_opts = opts.dup
24
+ indiv_opts = { :link_protein_hits => false }
25
+ super(arg, opts.merge(indiv_opts)) do
26
+ unless orig_opts[:link_protein_hits] == false
27
+ puts "MERGING GROUP!"
28
+ (@peps, @prots) = merge!(@searches.map {|v| v.peps }, &Ms::Sequest::Sqt::NEW_PROT)
29
+ end
30
+ end
31
+ block.call(self) if block_given?
32
+ end
33
+
34
+
35
+ # # NOTE THAT this is copy/paste from srf.rb, should be refactored...
36
+ ## returns the filename used
37
+ ## if the file exists, the name will be expanded to full path, otherwise just
38
+ ## what is given
39
+ #def to_sqg(sqg_filename='bioworks.sqg')
40
+ #File.open(sqg_filename, 'w') do |v|
41
+ #@filenames.each do |sqt_file|
42
+ #if File.exist? sqt_file
43
+ #v.puts File.expand_path(sqt_file)
44
+ #else
45
+ #v.puts sqt_file
46
+ #end
47
+ #end
48
+ #end
49
+ #sqg_filename
50
+ #end
51
+
52
+ end # SqtGroup
53
+
54
+
55
+ class Sqt
56
+ include Ms::Id::Search
57
+ PercolatorHeaderMatch = /^Percolator v/
58
+ Delimiter = "\t"
59
+ attr_accessor :header
60
+ attr_accessor :spectra
61
+ attr_accessor :base_name
62
+ # boolean
63
+ attr_accessor :percolator_results
64
+
65
+ # assumes the file exists and is readable
66
+ # returns [DBSeqLength, DBLocusCount, DBMD5Sum] or nil if no file
67
+ def self.get_db_info(dbfile)
68
+ Ms::Fasta.open(dbfile) do |fasta|
69
+ [fasta.total_sequence_length, fasta.size, fasta.md5_sum]
70
+ end
71
+ end
72
+
73
+ def protein_class
74
+ Ms::Sequest::Sqt::Locus
75
+ end
76
+
77
+ # opts =
78
+ # :percolator_results => false | true (default false)
79
+ # :link_protein_hits => true | false (default true)
80
+ def initialize(filename=nil, opts={})
81
+ @peps = []
82
+ @prots = []
83
+ if filename
84
+ from_file(filename, opts)
85
+ end
86
+ end
87
+
88
+ NEW_PROT = lambda do |_prot, _peps|
89
+ Ms::Sequest::Sqt::Locus.new([_prot.locus, _prot.description, _peps])
90
+ end
91
+
92
+ # if the file contains the header key '/$Percolator v/' then the results
93
+ # will be interpreted as percolator results regardless of the value
94
+ # passed in.
95
+ def from_file(filename, opts={})
96
+ opts = {:percolator_results=>false, :link_protein_hits => true}.merge(opts)
97
+ @percolator_results = opts[:percolator_results]
98
+ @base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
99
+ File.open(filename) do |fh|
100
+ @header = Ms::Sequest::Sqt::Header.new.from_handle(fh)
101
+ if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
102
+ @percolator_results = true
103
+ end
104
+ (@spectra, @peps) = Ms::Sequest::Sqt::Spectrum.spectra_from_handle(fh, @base_name, @percolator_results)
105
+ end
106
+ if opts[:link_protein_hits]
107
+ (@peps, @prots) = merge!([@peps], &NEW_PROT)
108
+ end
109
+ end
110
+
111
+
112
+ # Inherits from hash, so all header stuff can be accessed by key. Multiline
113
+ # values will be pushed into an array.
114
+ # All header values are stored as (newline-removed) strings!
115
+ class Header < Hash
116
+ Leader = 'H'
117
+
118
+ # These will be in arrays no matter what: StaticMod, DynamicMod, Comment
119
+ # Any other keys repeated will be shoved into an array; otherwise a string
120
+ Arrayed = %w(DyanmicMod StaticMod Comment).to_set
121
+
122
+ HeaderKeys = {
123
+ :sqt_generator => 'SQTGenerator',
124
+ :sqt_generator_version => 'SQTGeneratorVersion',
125
+ :database => 'Database',
126
+ :fragment_masses => 'FragmentMasses',
127
+ :precursor_masses => 'PrecursorMasses',
128
+ :start_time => 'StartTime',
129
+ :db_seq_length => 'DBSeqLength',
130
+ :db_locus_count => 'DBLocusCount',
131
+ :db_md5sum => 'DBMD5Sum',
132
+ :peptide_mass_tolerance => 'Alg-PreMassTol',
133
+ :fragment_ion_tolerance => 'Alg-FragMassTol',
134
+ # nonstandard (mine)
135
+ :peptide_mass_units => 'Alg-PreMassUnits',
136
+ :ion_series => 'Alg-IonSeries',
137
+ :enzyme => 'Alg-Enzyme',
138
+ # nonstandard (mine)
139
+ :ms_model => 'Alg-MSModel',
140
+ :static_mods => 'StaticMod',
141
+ :dynamic_mods => 'DynamicMod',
142
+ :comments => 'Comment'
143
+ }
144
+
145
+
146
+ KeysToAtts = HeaderKeys.invert
147
+
148
+ HeaderKeys.keys.each do |ky|
149
+ attr_accessor ky
150
+ end
151
+
152
+ def from_handle(fh)
153
+ Arrayed.each do |ky|
154
+ self[ky] = []
155
+ end
156
+ pos = fh.pos
157
+ lines = []
158
+ loop do
159
+ line = fh.gets
160
+ if line && (line[0,1] == Ms::Sequest::Sqt::Header::Leader )
161
+ lines << line
162
+ else # reset the fh.pos and we're done
163
+ fh.pos = pos
164
+ break
165
+ end
166
+ pos = fh.pos
167
+ end
168
+ from_lines(lines)
169
+ end
170
+
171
+ def from_lines(array_of_header_lines)
172
+ array_of_header_lines.each do |line|
173
+ line.chomp!
174
+ (ky, *rest) = line.split(Ms::Sequest::Sqt::Delimiter)[1..-1]
175
+ # just in case they have any tabs in their field
176
+ value = rest.join(Ms::Sequest::Sqt::Delimiter)
177
+ if Arrayed.include?(ky)
178
+ self[ky] << value
179
+ elsif self.key? ky # already exists
180
+ if self[ky].is_a? Array
181
+ self[ky] << value
182
+ else
183
+ self[ky] = [self[ky], value]
184
+ end
185
+ else # normal
186
+ self[ky] = value
187
+ end
188
+ end
189
+ KeysToAtts.each do |ky,methd|
190
+ self.send("#{methd}=".to_sym, self[ky])
191
+ end
192
+ self
193
+ end
194
+
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ # all are cast as expected (total_intensity is a float)
201
+ # mh = observed mh
202
+ Ms::Sequest::Sqt::Spectrum = Arrayclass.new(%w[first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches])
203
+
204
+ # 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
205
+
206
+ class Ms::Sequest::Sqt::Spectrum
207
+ Leader = 'S'
208
+
209
+ # assumes the first line starts with an 'S'
210
+ def self.spectra_from_handle(fh, base_name, percolator_results=false)
211
+ peps = []
212
+ spectra = []
213
+
214
+ while line = fh.gets
215
+ case line[0,1]
216
+ when Ms::Sequest::Sqt::Spectrum::Leader
217
+ spectrum = Ms::Sequest::Sqt::Spectrum.new.from_line( line )
218
+ spectra << spectrum
219
+ matches = []
220
+ spectrum.matches = matches
221
+ when Ms::Sequest::Sqt::Match::Leader
222
+ match_klass = if percolator_results
223
+ Ms::Sequest::Sqt::Match::Percolator
224
+ else
225
+ Ms::Sequest::Sqt::Match
226
+ end
227
+ match = match_klass.new.from_line( line )
228
+ match[10,3] = spectrum[0,3]
229
+ match[15] = base_name
230
+ matches << match
231
+ peps << match
232
+ loci = []
233
+ match.loci = loci
234
+ matches << match
235
+ when Ms::Sequest::Sqt::Locus::Leader
236
+ line.chomp!
237
+ key = line.split(Ms::Sequest::Sqt::Delimiter)[1]
238
+ locus = Ms::Sequest::Sqt::Locus.new.from_line( line )
239
+ loci << locus
240
+ end
241
+ end
242
+ # set the deltacn:
243
+ set_deltacn(spectra)
244
+ [spectra, peps]
245
+ end
246
+
247
+ def self.set_deltacn(spectra)
248
+ spectra.each do |spec|
249
+ matches = spec.matches
250
+ if matches.size > 0
251
+
252
+ (0...(matches.size-1)).each do |i|
253
+ matches[i].deltacn = matches[i+1].deltacn_orig
254
+ end
255
+ matches[-1].deltacn = 1.1
256
+ end
257
+ end
258
+ spectra
259
+ end
260
+
261
+
262
+ # returns an array -> [the next spectra line (or nil if eof), spectrum]
263
+ def from_line(line)
264
+ line.chomp!
265
+ ar = line.split(Ms::Sequest::Sqt::Delimiter)
266
+ self[0] = ar[1].to_i
267
+ self[1] = ar[2].to_i
268
+ self[2] = ar[3].to_i
269
+ self[3] = ar[4].to_f
270
+ self[4] = ar[5]
271
+ self[5] = ar[6].to_f
272
+ self[6] = ar[7].to_f
273
+ self[7] = ar[8].to_f
274
+ self[8] = ar[9].to_i
275
+ self[9] = []
276
+ self
277
+ end
278
+ end
279
+
280
+ # Sqt format uses only indices 0 - 9
281
+ Ms::Sequest::Sqt::Match = Arrayclass.new(%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci])
282
+
283
+ # 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
284
+
285
+ # rxcorr = rank by xcorr
286
+ # rsp = rank by sp
287
+ # NOTE:
288
+ # deltacn_orig
289
+ # deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
290
+ # give the last one 1.1)
291
+ class Ms::Sequest::Sqt::Match
292
+ Leader = 'M'
293
+
294
+ # same as 'loci'
295
+ def prots
296
+ self[16]
297
+ end
298
+
299
+ def from_line(line)
300
+ line.chomp!
301
+ ar = line.split(Ms::Sequest::Sqt::Delimiter)
302
+ self[0] = ar[1].to_i
303
+ self[1] = ar[2].to_i
304
+ self[2] = ar[3].to_f
305
+ self[3] = ar[4].to_f
306
+ self[4] = ar[5].to_f
307
+ self[5] = ar[6].to_f
308
+ self[6] = ar[7].to_i
309
+ self[7] = ar[8].to_i
310
+ self[8] = ar[9]
311
+ self[9] = ar[10]
312
+ self[14] = Ms::Id::Peptide.sequence_to_aaseq(self[8])
313
+ self
314
+ end
315
+ end
316
+
317
+
318
+ class Ms::Sequest::Sqt::Match::Percolator < Ms::Sequest::Sqt::Match
319
+ # we will keep access to these old terms since we can then access routines
320
+ # that sort on xcorr...
321
+ #undef_method :xcorr
322
+ #undef_method :xcorr=
323
+ #undef_method :sp
324
+ #undef_method :sp=
325
+
326
+ def percolator_score
327
+ self[4]
328
+ end
329
+ def percolator_score=(score)
330
+ self[4] = score
331
+ end
332
+ def negative_q_value
333
+ self[5]
334
+ end
335
+ def negative_q_value=(arg)
336
+ self[5] = arg
337
+ end
338
+ def q_value
339
+ -self[5]
340
+ end
341
+ # for compatibility with scripts that want this guy
342
+ def probability
343
+ -self[5]
344
+ end
345
+ end
346
+
347
+ Ms::Sequest::Sqt::Locus = Arrayclass.new(%w[locus description peps])
348
+
349
+ class Ms::Sequest::Sqt::Locus
350
+ Leader = 'L'
351
+
352
+ def first_entry ; self[0] end
353
+ def reference ; self[0] end
354
+
355
+ def from_line(line)
356
+ line.chomp!
357
+ ar = line.split(Ms::Sequest::Sqt::Delimiter)
358
+ self[0] = ar[1]
359
+ self[1] = ar[2]
360
+ self
361
+ end
362
+
363
+ end