mspire-sequest 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/.autotest +30 -0
  2. data/.gitmodules +9 -0
  3. data/History +79 -0
  4. data/LICENSE +22 -0
  5. data/README.rdoc +85 -0
  6. data/Rakefile +52 -0
  7. data/VERSION +1 -0
  8. data/bin/srf_to_pepxml.rb +7 -0
  9. data/bin/srf_to_search.rb +7 -0
  10. data/bin/srf_to_sqt.rb +8 -0
  11. data/lib/mspire/sequest/params.rb +331 -0
  12. data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
  13. data/lib/mspire/sequest/pepxml/params.rb +32 -0
  14. data/lib/mspire/sequest/sqt.rb +393 -0
  15. data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
  16. data/lib/mspire/sequest/srf/pepxml.rb +333 -0
  17. data/lib/mspire/sequest/srf/search.rb +158 -0
  18. data/lib/mspire/sequest/srf/sqt.rb +218 -0
  19. data/lib/mspire/sequest/srf.rb +715 -0
  20. data/lib/mspire/sequest.rb +6 -0
  21. data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
  22. data/spec/mspire/sequest/params_spec.rb +135 -0
  23. data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
  24. data/spec/mspire/sequest/pepxml_spec.rb +311 -0
  25. data/spec/mspire/sequest/sqt_spec.rb +51 -0
  26. data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
  27. data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
  28. data/spec/mspire/sequest/srf/search_spec.rb +131 -0
  29. data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
  30. data/spec/mspire/sequest/srf_spec.rb +113 -0
  31. data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
  32. data/spec/spec_helper.rb +22 -0
  33. data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  34. data/spec/testfiles/bioworks31.params +77 -0
  35. data/spec/testfiles/bioworks32.params +62 -0
  36. data/spec/testfiles/bioworks33.params +63 -0
  37. data/spec/testfiles/corrupted_900.srf +0 -0
  38. data/spec/testfiles/small.sqt +87 -0
  39. data/spec/testfiles/small2.sqt +176 -0
  40. metadata +185 -0
@@ -0,0 +1,247 @@
1
+ require 'mspire/ident/pepxml/search_hit/modification_info'
2
+
3
+ module Mspire ; end
4
+ module Mspire::Sequest ; end
5
+ class Mspire::Sequest::Pepxml ; end
6
+
7
+ class Mspire::Sequest::Pepxml::Modifications
8
+ # sequest params object
9
+ attr_accessor :params
10
+ # array holding AAModifications
11
+ attr_accessor :aa_mods
12
+ # array holding TerminalModifications
13
+ attr_accessor :term_mods
14
+ # a hash of all differential modifications present by aa_one_letter_symbol
15
+ # and special_symbol. This is NOT the mass difference but the total mass {
16
+ # 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
17
+ # the amino acid sequence, they are give the *differential* mass. The
18
+ # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
19
+ # cterminus ']' => 14.55 # nterminus
20
+ attr_accessor :aa_mod_to_tot_mass
21
+ # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
22
+ # values are the special_symbols
23
+ attr_accessor :mod_symbols_hash
24
+
25
+ # returns an array of all modifications (aa_mods, then term_mods)
26
+ def modifications
27
+ aa_mods + term_mods
28
+ end
29
+
30
+ # The modification symbols string looks like this:
31
+ # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
32
+ # ct is cterminal peptide (differential)
33
+ # nt is nterminal peptide (differential)
34
+ # the C is just cysteine
35
+ # will set_modifications and aa_mod_to_tot_mass hash
36
+ def initialize(params=nil, modification_symbols_string='')
37
+ @params = params
38
+ if @params
39
+ set_modifications(params, modification_symbols_string)
40
+ end
41
+ end
42
+
43
+ # set the aa_mod_to_tot_mass and mod_symbols_hash from
44
+ def set_hashes(modification_symbols_string)
45
+
46
+ @mod_symbols_hash = {}
47
+ @aa_mod_to_tot_mass = {}
48
+ if (modification_symbols_string == nil || modification_symbols_string == '')
49
+ return nil
50
+ end
51
+ table = @params.mass_index(:precursor)
52
+ modification_symbols_string.split(/\)\s+\(/).each do |mod|
53
+ if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
54
+ if $1 == 'ct' || $1 == 'nt'
55
+ mass_diff = $3.to_f
56
+ @aa_mod_to_tot_mass[$2] = mass_diff
57
+ @mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
58
+ # changed from below to match tests, is this right?
59
+ # @mod_symbols_hash[[$1, mass_diff]] = $2.dup
60
+ else
61
+ symbol_string = $2.dup
62
+ mass_diff = $3.to_f
63
+ $1.split('').each do |aa|
64
+ aa_as_sym = aa.to_sym
65
+ @aa_mod_to_tot_mass[aa+symbol_string] = mass_diff + table[aa_as_sym]
66
+ @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ # returns an array of static mod objects and static terminal mod objects
73
+ def create_static_mods(params)
74
+
75
+ ####################################
76
+ ## static mods
77
+ ####################################
78
+
79
+ static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
80
+ static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
81
+
82
+ params.mods.each do |k,v|
83
+ v_to_f = v.to_f
84
+ if v_to_f != 0.0
85
+ if k =~ /add_(\w)_/
86
+ static_mods << [$1.to_sym, v_to_f]
87
+ else
88
+ static_terminal_mods << [k, v_to_f]
89
+ end
90
+ end
91
+ end
92
+ aa_hash = params.mass_index(:precursor)
93
+
94
+ ## Create the static_mods objects
95
+ static_mods.map! do |mod|
96
+ hash = {
97
+ :aminoacid => mod[0].to_s,
98
+ :massdiff => mod[1],
99
+ :mass => aa_hash[mod[0]] + mod[1],
100
+ :variable => 'N',
101
+ :binary => 'Y',
102
+ }
103
+ Mspire::Ident::Pepxml::AminoacidModification.new(hash)
104
+ end
105
+
106
+ ## Create the static_terminal_mods objects
107
+ static_terminal_mods.map! do |mod|
108
+ terminus = if mod[0] =~ /Cterm/ ; 'c'
109
+ else ; 'n' # only two possible termini
110
+ end
111
+ protein_terminus = case mod[0]
112
+ when /Nterm_protein/ ; 'n'
113
+ when /Cterm_protein/ ; 'c'
114
+ else nil
115
+ end
116
+
117
+ # create the hash
118
+ hash = {
119
+ :terminus => terminus,
120
+ :massdiff => mod[1],
121
+ :variable => 'N',
122
+ :description => mod[0],
123
+ }
124
+ hash[:protein_terminus] = protein_terminus if protein_terminus
125
+ Mspire::Ident::Pepxml::TerminalModification.new(hash)
126
+ end
127
+ [static_mods, static_terminal_mods]
128
+ end
129
+
130
+ # 1. sets aa_mods and term_mods from a sequest params object
131
+ # 2. sets @params
132
+ # 3. sets @aa_mod_to_tot_mass
133
+ def set_modifications(params, modification_symbols_string)
134
+ @params = params
135
+
136
+ set_hashes(modification_symbols_string)
137
+ (static_mods, static_terminal_mods) = create_static_mods(params)
138
+
139
+ aa_hash = params.mass_index(:precursor)
140
+ #################################
141
+ # Variable Mods:
142
+ #################################
143
+ arr = params.diff_search_options.rstrip.split(/\s+/)
144
+ # [aa.to_sym, diff.to_f]
145
+ variable_mods = []
146
+ (0...arr.size).step(2) do |i|
147
+ if arr[i].to_f != 0.0
148
+ variable_mods << [arr[i+1], arr[i].to_f]
149
+ end
150
+ end
151
+ mod_objects = []
152
+ variable_mods.each do |mod|
153
+ mod[0].split('').each do |aa|
154
+ hash = {
155
+
156
+ :aminoacid => aa,
157
+ :massdiff => mod[1],
158
+ :mass => aa_hash[aa.to_sym] + mod[1],
159
+ :variable => 'Y',
160
+ :binary => 'N',
161
+ :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
162
+ }
163
+ mod_objects << Mspire::Ident::Pepxml::AminoacidModification.new(hash)
164
+ end
165
+ end
166
+
167
+ variable_mods = mod_objects
168
+ #################################
169
+ # TERMINAL Variable Mods:
170
+ #################################
171
+ # These are always peptide, not protein termini (for sequest)
172
+ (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
173
+
174
+ to_add = []
175
+ if nterm_diff != 0.0
176
+ to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
177
+ end
178
+ if cterm_diff != 0.0
179
+ to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
180
+ end
181
+
182
+ variable_terminal_mods = to_add.map do |term, mssdiff, symb|
183
+ hash = {
184
+ :terminus => term,
185
+ :massdiff => mssdiff,
186
+ :variable => 'Y',
187
+ :symbol => symb,
188
+ }
189
+ Mspire::Ident::Pepxml::TerminalModification.new(hash)
190
+ end
191
+
192
+ #########################
193
+ # COLLECT THEM
194
+ #########################
195
+ @aa_mods = static_mods + variable_mods
196
+ @term_mods = static_terminal_mods + variable_terminal_mods
197
+ end
198
+
199
+ # takes a peptide sequence with modifications but no preceding or trailing
200
+ # amino acids. (e.g. expects "]PEPT*IDE" but not 'K.PEPTIDE.R')
201
+ # returns a ModificationInfo object
202
+ # if there are no modifications, returns nil
203
+ def modification_info(mod_peptide)
204
+ return nil if @aa_mod_to_tot_mass.size == 0
205
+ mod_info = Mspire::Ident::Pepxml::SearchHit::ModificationInfo.new( mod_peptide.dup )
206
+ mass_table = @params.mass_index(:precursor)
207
+
208
+ # TERMINI:
209
+ ## only the termini can match a single char
210
+ if @aa_mod_to_tot_mass.key? mod_peptide[0,1]
211
+ # AA + H + differential_mod
212
+ mod_info.mod_nterm_mass = mass_table[mod_peptide[1,1].to_sym] + mass_table['h+'] + @aa_mod_to_tot_mass[mod_peptide[0,1]]
213
+ mod_peptide = mod_peptide[1...(mod_peptide.size)]
214
+ end
215
+ if @aa_mod_to_tot_mass.key? mod_peptide[(mod_peptide.size-1),1]
216
+ # AA + OH + differential_mod
217
+ mod_info.mod_cterm_mass = mass_table[mod_peptide[(mod_peptide.size-2),1].to_sym] + mass_table['oh'] + @aa_mod_to_tot_mass[mod_peptide[-1,1]]
218
+ mod_peptide = mod_peptide[0...(mod_peptide.size-1)]
219
+ end
220
+
221
+ # OTHER DIFFERENTIAL MODS:
222
+ mod_array = []
223
+ mod_cnt = 1
224
+ bare_cnt = 1
225
+ last_normal_aa = mod_peptide[0,1]
226
+ (1...mod_peptide.size).each do |i|
227
+ if @aa_mod_to_tot_mass.key?( last_normal_aa + mod_peptide[i,1] )
228
+ # we don't save the result because most amino acids will not be
229
+ # modified
230
+ mod_array << Mspire::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(bare_cnt, @aa_mod_to_tot_mass[last_normal_aa + mod_peptide[i,1]])
231
+ else
232
+ last_normal_aa = mod_peptide[i,1]
233
+ bare_cnt += 1
234
+ end
235
+ mod_cnt += 1
236
+ end
237
+ if mod_cnt == bare_cnt
238
+ nil
239
+ else
240
+ mod_info.mod_aminoacid_masses = mod_array if mod_array.size > 0
241
+ mod_info
242
+ end
243
+ end
244
+
245
+
246
+ end
247
+
@@ -0,0 +1,32 @@
1
+
2
+ module Mspire ; end
3
+ module Mspire::Sequest ; end
4
+
5
+ class Mspire::Sequest::Params
6
+
7
+ # returns a Mspire::Ident::Pepxml::SampleEnzyme object
8
+ def sample_enzyme
9
+ Mspire::Ident::Pepxml::SampleEnzyme.new(sample_enzyme_hash)
10
+ end
11
+
12
+ # returns a hash suitable for setting a Mspire::Ident::Pepxml::SampleEnzyme object
13
+ def sample_enzyme_hash
14
+ (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
15
+ if v == '' ; nil ; else v end
16
+ end
17
+ hash = {}
18
+ hash[:name] = self.enzyme
19
+ hash[:cut] = cleave_at
20
+ hash[:no_cut] = except_if_after
21
+ hash[:sense] =
22
+ if hash[:name] == "No_Enzyme"
23
+ nil
24
+ elsif offset == 1
25
+ 'C'
26
+ elsif offset == 0
27
+ 'N'
28
+ end
29
+ hash
30
+ end
31
+
32
+ end
@@ -0,0 +1,393 @@
1
+ require 'set'
2
+
3
+ require 'mspire/fasta'
4
+ require 'digest/md5'
5
+
6
+ require 'mspire/ident/peptide'
7
+ require 'mspire/ident/search'
8
+
9
+ module Mspire
10
+ module Sequest
11
+ class SqtGroup
12
+ include Mspire::Ident::SearchGroup
13
+
14
+ #attr_accessor :sqts, :filenames
15
+
16
+ def search_class
17
+ Mspire::Sequest::Sqt
18
+ end
19
+
20
+ def extension() 'sqg' end
21
+
22
+ def initialize(arg, opts={}, &block)
23
+ orig_opts = opts.dup
24
+ indiv_opts = { :link_protein_hits => false }
25
+ super(arg, opts.merge(indiv_opts)) do
26
+ unless orig_opts[:link_protein_hits] == false
27
+ puts "MERGING GROUP!"
28
+ (@peptides, @proteins) = merge!(@searches.map {|v| v.peptides }, &Mspire::Sequest::Sqt::NEW_PROT)
29
+ end
30
+ end
31
+ block.call(self) if block_given?
32
+ end
33
+
34
+
35
+ # # NOTE THAT this is copy/paste from srf.rb, should be refactored...
36
+ ## returns the filename used
37
+ ## if the file exists, the name will be expanded to full path, otherwise just
38
+ ## what is given
39
+ #def to_sqg(sqg_filename='bioworks.sqg')
40
+ #File.open(sqg_filename, 'w') do |v|
41
+ #@filenames.each do |sqt_file|
42
+ #if File.exist? sqt_file
43
+ #v.puts File.expand_path(sqt_file)
44
+ #else
45
+ #v.puts sqt_file
46
+ #end
47
+ #end
48
+ #end
49
+ #sqg_filename
50
+ #end
51
+
52
+ end # SqtGroup
53
+
54
+
55
+ class Sqt
56
+ include Mspire::Ident::SearchLike
57
+ PercolatorHeaderMatch = /^Percolator v/
58
+ Delimiter = "\t"
59
+ attr_accessor :header
60
+ attr_accessor :spectra
61
+ attr_accessor :base_name
62
+ # boolean
63
+ attr_accessor :percolator_results
64
+
65
+ # returns [sequence_length, locus_count] of the fasta file
66
+ def self.db_seq_length_and_locus_count(dbfile)
67
+ total_sequence_length = 0
68
+ fastasize = 0
69
+ Mspire::Fasta.open(dbfile) do |fasta|
70
+ fasta.each do |entry|
71
+ total_sequence_length += entry.sequence.size
72
+ fastasize += 1
73
+ end
74
+ end
75
+ [total_sequence_length, fastasize]
76
+ end
77
+
78
+ #--
79
+ # this is implemented separate from sequence length because seq length
80
+ # uses Archive which doesn't preserve carriage returns and newlines.
81
+ #++
82
+ def self.db_md5sum(dbfile)
83
+ chunksize = 61440
84
+ digest = Digest::MD5.new
85
+ File.open(dbfile) do |io|
86
+ while chunk = io.read(chunksize)
87
+ digest << chunk
88
+ end
89
+ end
90
+ digest.hexdigest
91
+ end
92
+
93
+ # assumes the file exists and is readable
94
+ # returns [DBSeqLength, DBLocusCount, DBMD5Sum]
95
+ def self.db_info(dbfile)
96
+ # returns the 3 member array
97
+ self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
98
+ end
99
+
100
+ def protein_class
101
+ Mspire::Sequest::Sqt::Locus
102
+ end
103
+
104
+ # opts =
105
+ # :percolator_results => false | true (default false)
106
+ # :link_protein_hits => true | false (default true)
107
+ def initialize(filename=nil, opts={})
108
+ peptide_hits = []
109
+ if filename
110
+ from_file(filename, opts)
111
+ end
112
+ end
113
+
114
+ NEW_PROT = lambda do |_prot, _peptides|
115
+ Mspire::Sequest::Sqt::Locus.new(_prot.locus, _prot.description, _peptides)
116
+ end
117
+
118
+ # if the file contains the header key '/$Percolator v/' then the results
119
+ # will be interpreted as percolator results regardless of the value
120
+ # passed in.
121
+ def from_file(filename, opts={})
122
+ opts = {:percolator_results=>false, :link_protein_hits => true}.merge(opts)
123
+ @percolator_results = opts[:percolator_results]
124
+ @base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
125
+ File.open(filename) do |fh|
126
+ @header = Mspire::Sequest::Sqt::Header.new.from_handle(fh)
127
+ if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
128
+ @percolator_results = true
129
+ end
130
+ (@spectra, @peptides) = Mspire::Sequest::Sqt::Spectrum.spectra_from_handle(fh, @base_name, @percolator_results)
131
+ end
132
+ end
133
+
134
+
135
+ # Inherits from hash, so all header stuff can be accessed by key. Multiline
136
+ # values will be pushed into an array.
137
+ # All header values are stored as (newline-removed) strings!
138
+ class Header < Hash
139
+ Leader = 'H'
140
+
141
+ # These will be in arrays no matter what: StaticMod, DynamicMod, Comment
142
+ # Any other keys repeated will be shoved into an array; otherwise a string
143
+ Arrayed = %w(DyanmicMod StaticMod Comment).to_set
144
+
145
+ HeaderKeys = {
146
+ :sqt_generator => 'SQTGenerator',
147
+ :sqt_generator_version => 'SQTGeneratorVersion',
148
+ :database => 'Database',
149
+ :fragment_masses => 'FragmentMasses',
150
+ :precursor_masses => 'PrecursorMasses',
151
+ :start_time => 'StartTime',
152
+ :db_seq_length => 'DBSeqLength',
153
+ :db_locus_count => 'DBLocusCount',
154
+ :db_md5sum => 'DBMD5Sum',
155
+ :peptide_mass_tolerance => 'Alg-PreMassTol',
156
+ :fragment_ion_tolerance => 'Alg-FragMassTol',
157
+ # nonstandard (mine)
158
+ :peptide_mass_units => 'Alg-PreMassUnits',
159
+ :ion_series => 'Alg-IonSeries',
160
+ :enzyme => 'Alg-Enzyme',
161
+ # nonstandard (mine)
162
+ :ms_model => 'Alg-MSModel',
163
+ :static_mods => 'StaticMod',
164
+ :dynamic_mods => 'DynamicMod',
165
+ :comments => 'Comment'
166
+ }
167
+
168
+
169
+ KeysToAtts = HeaderKeys.invert
170
+
171
+ HeaderKeys.keys.each do |ky|
172
+ attr_accessor ky
173
+ end
174
+
175
+ def from_handle(fh)
176
+ Arrayed.each do |ky|
177
+ self[ky] = []
178
+ end
179
+ pos = fh.pos
180
+ lines = []
181
+ loop do
182
+ line = fh.gets
183
+ if line && (line[0,1] == Mspire::Sequest::Sqt::Header::Leader )
184
+ lines << line
185
+ else # reset the fh.pos and we're done
186
+ fh.pos = pos
187
+ break
188
+ end
189
+ pos = fh.pos
190
+ end
191
+ from_lines(lines)
192
+ end
193
+
194
+ def from_lines(array_of_header_lines)
195
+ array_of_header_lines.each do |line|
196
+ line.chomp!
197
+ (ky, *rest) = line.split(Mspire::Sequest::Sqt::Delimiter)[1..-1]
198
+ # just in case they have any tabs in their field
199
+ value = rest.join(Mspire::Sequest::Sqt::Delimiter)
200
+ if Arrayed.include?(ky)
201
+ self[ky] << value
202
+ elsif self.key? ky # already exists
203
+ if self[ky].is_a? Array
204
+ self[ky] << value
205
+ else
206
+ self[ky] = [self[ky], value]
207
+ end
208
+ else # normal
209
+ self[ky] = value
210
+ end
211
+ end
212
+ KeysToAtts.each do |ky,methd|
213
+ self.send("#{methd}=".to_sym, self[ky])
214
+ end
215
+ self
216
+ end
217
+
218
+ end
219
+ end
220
+ end
221
+ end
222
+
223
+ # all are cast as expected (total_intensity is a float)
224
+ # mh = observed mh
225
+ Mspire::Sequest::Sqt::Spectrum = Struct.new(* %w(first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches).map(&:to_sym) )
226
+
227
+ # 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
228
+
229
+ class Mspire::Sequest::Sqt::Spectrum
230
+ Leader = 'S'
231
+
232
+ # assumes the first line starts with an 'S'
233
+ def self.spectra_from_handle(fh, base_name, percolator_results=false)
234
+ peptides = []
235
+ spectra = []
236
+
237
+ while line = fh.gets
238
+ case line[0,1]
239
+ when Mspire::Sequest::Sqt::Spectrum::Leader
240
+ spectrum = Mspire::Sequest::Sqt::Spectrum.new.from_line( line )
241
+ spectra << spectrum
242
+ matches = []
243
+ spectrum.matches = matches
244
+ when Mspire::Sequest::Sqt::Match::Leader
245
+ match_klass = if percolator_results
246
+ Mspire::Sequest::Sqt::Match::Percolator
247
+ else
248
+ Mspire::Sequest::Sqt::Match
249
+ end
250
+ match = match_klass.new.from_line( line )
251
+ #match[10,3] = spectrum[0,3]
252
+ # structs cannot set multiple values at a time :(
253
+ match[10] = spectrum[0]
254
+ match[11] = spectrum[1]
255
+ match[12] = spectrum[2]
256
+ match[15] = base_name
257
+ matches << match
258
+ peptides << match
259
+ loci = []
260
+ match.loci = loci
261
+ matches << match
262
+ when Mspire::Sequest::Sqt::Locus::Leader
263
+ line.chomp!
264
+ key = line.split(Mspire::Sequest::Sqt::Delimiter)[1]
265
+ locus = Mspire::Sequest::Sqt::Locus.from_line( line )
266
+ loci << locus
267
+ end
268
+ end
269
+ # set the deltacn:
270
+ set_deltacn(spectra)
271
+ [spectra, peptides]
272
+ end
273
+
274
+ def self.set_deltacn(spectra)
275
+ spectra.each do |spec|
276
+ matches = spec.matches
277
+ if matches.size > 0
278
+
279
+ (0...(matches.size-1)).each do |i|
280
+ matches[i].deltacn = matches[i+1].deltacn_orig
281
+ end
282
+ matches[-1].deltacn = 1.1
283
+ end
284
+ end
285
+ spectra
286
+ end
287
+
288
+
289
+ # returns an array -> [the next spectra line (or nil if eof), spectrum]
290
+ def from_line(line)
291
+ line.chomp!
292
+ ar = line.split(Mspire::Sequest::Sqt::Delimiter)
293
+ self[0] = ar[1].to_i
294
+ self[1] = ar[2].to_i
295
+ self[2] = ar[3].to_i
296
+ self[3] = ar[4].to_f
297
+ self[4] = ar[5]
298
+ self[5] = ar[6].to_f
299
+ self[6] = ar[7].to_f
300
+ self[7] = ar[8].to_f
301
+ self[8] = ar[9].to_i
302
+ self[9] = []
303
+ self
304
+ end
305
+ end
306
+
307
+ # Sqt format uses only indices 0 - 9
308
+ Mspire::Sequest::Sqt::Match = Struct.new( *%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci].map(&:to_sym) )
309
+
310
+ # 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
311
+
312
+ # rxcorr = rank by xcorr
313
+ # rsp = rank by sp
314
+ # NOTE:
315
+ # deltacn_orig
316
+ # deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
317
+ # give the last one 1.1)
318
+ class Mspire::Sequest::Sqt::Match
319
+ Leader = 'M'
320
+
321
+ # same as 'loci'
322
+ def proteins
323
+ self[16]
324
+ end
325
+
326
+ def from_line(line)
327
+ line.chomp!
328
+ ar = line.split(Mspire::Sequest::Sqt::Delimiter)
329
+ self[0] = ar[1].to_i
330
+ self[1] = ar[2].to_i
331
+ self[2] = ar[3].to_f
332
+ self[3] = ar[4].to_f
333
+ self[4] = ar[5].to_f
334
+ self[5] = ar[6].to_f
335
+ self[6] = ar[7].to_i
336
+ self[7] = ar[8].to_i
337
+ self[8] = ar[9]
338
+ self[9] = ar[10]
339
+ self[14] = Mspire::Ident::Peptide.sequence_to_aaseq(self[8])
340
+ self
341
+ end
342
+ end
343
+
344
+
345
+ class Mspire::Sequest::Sqt::Match::Percolator < Mspire::Sequest::Sqt::Match
346
+ # we will keep access to these old terms since we can then access routines
347
+ # that sort on xcorr...
348
+ #undef_method :xcorr
349
+ #undef_method :xcorr=
350
+ #undef_method :sp
351
+ #undef_method :sp=
352
+
353
+ def percolator_score
354
+ self[4]
355
+ end
356
+ def percolator_score=(score)
357
+ self[4] = score
358
+ end
359
+ def negative_q_value
360
+ self[5]
361
+ end
362
+ def negative_q_value=(arg)
363
+ self[5] = arg
364
+ end
365
+ def q_value
366
+ -self[5]
367
+ end
368
+ # for compatibility with scripts that want this guy
369
+ def probability
370
+ -self[5]
371
+ end
372
+ end
373
+
374
+ Mspire::Sequest::Sqt::Locus = Struct.new( :locus, :description )
375
+
376
+ class Mspire::Sequest::Sqt::Locus
377
+ Leader = 'L'
378
+
379
+ def first_entry ; self[0] end
380
+ def reference ; self[0] end
381
+ def id ; self[0] end
382
+
383
+ def initialize(locus=nil, description=nil, peptides=[])
384
+ super(locus, description)
385
+ end
386
+
387
+ # returns a new Locus object
388
+ def self.from_line(line)
389
+ line.chomp!
390
+ self.new( *line.split(Mspire::Sequest::Sqt::Delimiter) ) # fills in the first two values
391
+ end
392
+
393
+ end
@@ -0,0 +1,21 @@
1
+ module Mspire ; end
2
+ module Mspire::Ident ; end
3
+
4
+ class Mspire::Ident::Pepxml
5
+ class SearchHit
6
+ Sequest = Struct.new(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank) do
7
+
8
+ # Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
9
+ def self.split_ions(ions)
10
+ ions.split("/").map {|ion| ion.to_i }
11
+ end
12
+
13
+ def to_xml(builder)
14
+ members.zip(self.to_a) do |sym, val|
15
+ builder.search_score(:name => sym, :value => val)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+