mspire-sequest 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/.autotest +30 -0
  2. data/.gitmodules +9 -0
  3. data/History +79 -0
  4. data/LICENSE +22 -0
  5. data/README.rdoc +85 -0
  6. data/Rakefile +52 -0
  7. data/VERSION +1 -0
  8. data/bin/srf_to_pepxml.rb +7 -0
  9. data/bin/srf_to_search.rb +7 -0
  10. data/bin/srf_to_sqt.rb +8 -0
  11. data/lib/mspire/sequest/params.rb +331 -0
  12. data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
  13. data/lib/mspire/sequest/pepxml/params.rb +32 -0
  14. data/lib/mspire/sequest/sqt.rb +393 -0
  15. data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
  16. data/lib/mspire/sequest/srf/pepxml.rb +333 -0
  17. data/lib/mspire/sequest/srf/search.rb +158 -0
  18. data/lib/mspire/sequest/srf/sqt.rb +218 -0
  19. data/lib/mspire/sequest/srf.rb +715 -0
  20. data/lib/mspire/sequest.rb +6 -0
  21. data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
  22. data/spec/mspire/sequest/params_spec.rb +135 -0
  23. data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
  24. data/spec/mspire/sequest/pepxml_spec.rb +311 -0
  25. data/spec/mspire/sequest/sqt_spec.rb +51 -0
  26. data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
  27. data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
  28. data/spec/mspire/sequest/srf/search_spec.rb +131 -0
  29. data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
  30. data/spec/mspire/sequest/srf_spec.rb +113 -0
  31. data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
  32. data/spec/spec_helper.rb +22 -0
  33. data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  34. data/spec/testfiles/bioworks31.params +77 -0
  35. data/spec/testfiles/bioworks32.params +62 -0
  36. data/spec/testfiles/bioworks33.params +63 -0
  37. data/spec/testfiles/corrupted_900.srf +0 -0
  38. data/spec/testfiles/small.sqt +87 -0
  39. data/spec/testfiles/small2.sqt +176 -0
  40. metadata +185 -0
@@ -0,0 +1,247 @@
1
+ require 'mspire/ident/pepxml/search_hit/modification_info'
2
+
3
+ module Mspire ; end
4
+ module Mspire::Sequest ; end
5
+ class Mspire::Sequest::Pepxml ; end
6
+
7
+ class Mspire::Sequest::Pepxml::Modifications
8
+ # sequest params object
9
+ attr_accessor :params
10
+ # array holding AAModifications
11
+ attr_accessor :aa_mods
12
+ # array holding TerminalModifications
13
+ attr_accessor :term_mods
14
+ # a hash of all differential modifications present by aa_one_letter_symbol
15
+ # and special_symbol. This is NOT the mass difference but the total mass {
16
+ # 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
17
+ # the amino acid sequence, they are give the *differential* mass. The
18
+ # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
19
+ # cterminus ']' => 14.55 # nterminus
20
+ attr_accessor :aa_mod_to_tot_mass
21
+ # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
22
+ # values are the special_symbols
23
+ attr_accessor :mod_symbols_hash
24
+
25
+ # returns an array of all modifications (aa_mods, then term_mods)
26
+ def modifications
27
+ aa_mods + term_mods
28
+ end
29
+
30
+ # The modification symbols string looks like this:
31
+ # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
32
+ # ct is cterminal peptide (differential)
33
+ # nt is nterminal peptide (differential)
34
+ # the C is just cysteine
35
+ # will set_modifications and aa_mod_to_tot_mass hash
36
+ def initialize(params=nil, modification_symbols_string='')
37
+ @params = params
38
+ if @params
39
+ set_modifications(params, modification_symbols_string)
40
+ end
41
+ end
42
+
43
+ # set the aa_mod_to_tot_mass and mod_symbols_hash from
44
+ def set_hashes(modification_symbols_string)
45
+
46
+ @mod_symbols_hash = {}
47
+ @aa_mod_to_tot_mass = {}
48
+ if (modification_symbols_string == nil || modification_symbols_string == '')
49
+ return nil
50
+ end
51
+ table = @params.mass_index(:precursor)
52
+ modification_symbols_string.split(/\)\s+\(/).each do |mod|
53
+ if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
54
+ if $1 == 'ct' || $1 == 'nt'
55
+ mass_diff = $3.to_f
56
+ @aa_mod_to_tot_mass[$2] = mass_diff
57
+ @mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
58
+ # changed from below to match tests, is this right?
59
+ # @mod_symbols_hash[[$1, mass_diff]] = $2.dup
60
+ else
61
+ symbol_string = $2.dup
62
+ mass_diff = $3.to_f
63
+ $1.split('').each do |aa|
64
+ aa_as_sym = aa.to_sym
65
+ @aa_mod_to_tot_mass[aa+symbol_string] = mass_diff + table[aa_as_sym]
66
+ @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ # returns an array of static mod objects and static terminal mod objects
73
+ def create_static_mods(params)
74
+
75
+ ####################################
76
+ ## static mods
77
+ ####################################
78
+
79
+ static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
80
+ static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
81
+
82
+ params.mods.each do |k,v|
83
+ v_to_f = v.to_f
84
+ if v_to_f != 0.0
85
+ if k =~ /add_(\w)_/
86
+ static_mods << [$1.to_sym, v_to_f]
87
+ else
88
+ static_terminal_mods << [k, v_to_f]
89
+ end
90
+ end
91
+ end
92
+ aa_hash = params.mass_index(:precursor)
93
+
94
+ ## Create the static_mods objects
95
+ static_mods.map! do |mod|
96
+ hash = {
97
+ :aminoacid => mod[0].to_s,
98
+ :massdiff => mod[1],
99
+ :mass => aa_hash[mod[0]] + mod[1],
100
+ :variable => 'N',
101
+ :binary => 'Y',
102
+ }
103
+ Mspire::Ident::Pepxml::AminoacidModification.new(hash)
104
+ end
105
+
106
+ ## Create the static_terminal_mods objects
107
+ static_terminal_mods.map! do |mod|
108
+ terminus = if mod[0] =~ /Cterm/ ; 'c'
109
+ else ; 'n' # only two possible termini
110
+ end
111
+ protein_terminus = case mod[0]
112
+ when /Nterm_protein/ ; 'n'
113
+ when /Cterm_protein/ ; 'c'
114
+ else nil
115
+ end
116
+
117
+ # create the hash
118
+ hash = {
119
+ :terminus => terminus,
120
+ :massdiff => mod[1],
121
+ :variable => 'N',
122
+ :description => mod[0],
123
+ }
124
+ hash[:protein_terminus] = protein_terminus if protein_terminus
125
+ Mspire::Ident::Pepxml::TerminalModification.new(hash)
126
+ end
127
+ [static_mods, static_terminal_mods]
128
+ end
129
+
130
+ # 1. sets aa_mods and term_mods from a sequest params object
131
+ # 2. sets @params
132
+ # 3. sets @aa_mod_to_tot_mass
133
+ def set_modifications(params, modification_symbols_string)
134
+ @params = params
135
+
136
+ set_hashes(modification_symbols_string)
137
+ (static_mods, static_terminal_mods) = create_static_mods(params)
138
+
139
+ aa_hash = params.mass_index(:precursor)
140
+ #################################
141
+ # Variable Mods:
142
+ #################################
143
+ arr = params.diff_search_options.rstrip.split(/\s+/)
144
+ # [aa.to_sym, diff.to_f]
145
+ variable_mods = []
146
+ (0...arr.size).step(2) do |i|
147
+ if arr[i].to_f != 0.0
148
+ variable_mods << [arr[i+1], arr[i].to_f]
149
+ end
150
+ end
151
+ mod_objects = []
152
+ variable_mods.each do |mod|
153
+ mod[0].split('').each do |aa|
154
+ hash = {
155
+
156
+ :aminoacid => aa,
157
+ :massdiff => mod[1],
158
+ :mass => aa_hash[aa.to_sym] + mod[1],
159
+ :variable => 'Y',
160
+ :binary => 'N',
161
+ :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
162
+ }
163
+ mod_objects << Mspire::Ident::Pepxml::AminoacidModification.new(hash)
164
+ end
165
+ end
166
+
167
+ variable_mods = mod_objects
168
+ #################################
169
+ # TERMINAL Variable Mods:
170
+ #################################
171
+ # These are always peptide, not protein termini (for sequest)
172
+ (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
173
+
174
+ to_add = []
175
+ if nterm_diff != 0.0
176
+ to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
177
+ end
178
+ if cterm_diff != 0.0
179
+ to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
180
+ end
181
+
182
+ variable_terminal_mods = to_add.map do |term, mssdiff, symb|
183
+ hash = {
184
+ :terminus => term,
185
+ :massdiff => mssdiff,
186
+ :variable => 'Y',
187
+ :symbol => symb,
188
+ }
189
+ Mspire::Ident::Pepxml::TerminalModification.new(hash)
190
+ end
191
+
192
+ #########################
193
+ # COLLECT THEM
194
+ #########################
195
+ @aa_mods = static_mods + variable_mods
196
+ @term_mods = static_terminal_mods + variable_terminal_mods
197
+ end
198
+
199
+ # takes a peptide sequence with modifications but no preceding or trailing
200
+ # amino acids. (e.g. expects "]PEPT*IDE" but not 'K.PEPTIDE.R')
201
+ # returns a ModificationInfo object
202
+ # if there are no modifications, returns nil
203
+ def modification_info(mod_peptide)
204
+ return nil if @aa_mod_to_tot_mass.size == 0
205
+ mod_info = Mspire::Ident::Pepxml::SearchHit::ModificationInfo.new( mod_peptide.dup )
206
+ mass_table = @params.mass_index(:precursor)
207
+
208
+ # TERMINI:
209
+ ## only the termini can match a single char
210
+ if @aa_mod_to_tot_mass.key? mod_peptide[0,1]
211
+ # AA + H + differential_mod
212
+ mod_info.mod_nterm_mass = mass_table[mod_peptide[1,1].to_sym] + mass_table['h+'] + @aa_mod_to_tot_mass[mod_peptide[0,1]]
213
+ mod_peptide = mod_peptide[1...(mod_peptide.size)]
214
+ end
215
+ if @aa_mod_to_tot_mass.key? mod_peptide[(mod_peptide.size-1),1]
216
+ # AA + OH + differential_mod
217
+ mod_info.mod_cterm_mass = mass_table[mod_peptide[(mod_peptide.size-2),1].to_sym] + mass_table['oh'] + @aa_mod_to_tot_mass[mod_peptide[-1,1]]
218
+ mod_peptide = mod_peptide[0...(mod_peptide.size-1)]
219
+ end
220
+
221
+ # OTHER DIFFERENTIAL MODS:
222
+ mod_array = []
223
+ mod_cnt = 1
224
+ bare_cnt = 1
225
+ last_normal_aa = mod_peptide[0,1]
226
+ (1...mod_peptide.size).each do |i|
227
+ if @aa_mod_to_tot_mass.key?( last_normal_aa + mod_peptide[i,1] )
228
+ # we don't save the result because most amino acids will not be
229
+ # modified
230
+ mod_array << Mspire::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(bare_cnt, @aa_mod_to_tot_mass[last_normal_aa + mod_peptide[i,1]])
231
+ else
232
+ last_normal_aa = mod_peptide[i,1]
233
+ bare_cnt += 1
234
+ end
235
+ mod_cnt += 1
236
+ end
237
+ if mod_cnt == bare_cnt
238
+ nil
239
+ else
240
+ mod_info.mod_aminoacid_masses = mod_array if mod_array.size > 0
241
+ mod_info
242
+ end
243
+ end
244
+
245
+
246
+ end
247
+
@@ -0,0 +1,32 @@
1
+
2
+ module Mspire ; end
3
+ module Mspire::Sequest ; end
4
+
5
+ class Mspire::Sequest::Params
6
+
7
+ # returns a Mspire::Ident::Pepxml::SampleEnzyme object
8
+ def sample_enzyme
9
+ Mspire::Ident::Pepxml::SampleEnzyme.new(sample_enzyme_hash)
10
+ end
11
+
12
+ # returns a hash suitable for setting a Mspire::Ident::Pepxml::SampleEnzyme object
13
+ def sample_enzyme_hash
14
+ (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
15
+ if v == '' ; nil ; else v end
16
+ end
17
+ hash = {}
18
+ hash[:name] = self.enzyme
19
+ hash[:cut] = cleave_at
20
+ hash[:no_cut] = except_if_after
21
+ hash[:sense] =
22
+ if hash[:name] == "No_Enzyme"
23
+ nil
24
+ elsif offset == 1
25
+ 'C'
26
+ elsif offset == 0
27
+ 'N'
28
+ end
29
+ hash
30
+ end
31
+
32
+ end
@@ -0,0 +1,393 @@
1
+ require 'set'
2
+
3
+ require 'mspire/fasta'
4
+ require 'digest/md5'
5
+
6
+ require 'mspire/ident/peptide'
7
+ require 'mspire/ident/search'
8
+
9
+ module Mspire
10
+ module Sequest
11
+ class SqtGroup
12
+ include Mspire::Ident::SearchGroup
13
+
14
+ #attr_accessor :sqts, :filenames
15
+
16
+ def search_class
17
+ Mspire::Sequest::Sqt
18
+ end
19
+
20
+ def extension() 'sqg' end
21
+
22
+ def initialize(arg, opts={}, &block)
23
+ orig_opts = opts.dup
24
+ indiv_opts = { :link_protein_hits => false }
25
+ super(arg, opts.merge(indiv_opts)) do
26
+ unless orig_opts[:link_protein_hits] == false
27
+ puts "MERGING GROUP!"
28
+ (@peptides, @proteins) = merge!(@searches.map {|v| v.peptides }, &Mspire::Sequest::Sqt::NEW_PROT)
29
+ end
30
+ end
31
+ block.call(self) if block_given?
32
+ end
33
+
34
+
35
+ # # NOTE THAT this is copy/paste from srf.rb, should be refactored...
36
+ ## returns the filename used
37
+ ## if the file exists, the name will be expanded to full path, otherwise just
38
+ ## what is given
39
+ #def to_sqg(sqg_filename='bioworks.sqg')
40
+ #File.open(sqg_filename, 'w') do |v|
41
+ #@filenames.each do |sqt_file|
42
+ #if File.exist? sqt_file
43
+ #v.puts File.expand_path(sqt_file)
44
+ #else
45
+ #v.puts sqt_file
46
+ #end
47
+ #end
48
+ #end
49
+ #sqg_filename
50
+ #end
51
+
52
+ end # SqtGroup
53
+
54
+
55
+ class Sqt
56
+ include Mspire::Ident::SearchLike
57
+ PercolatorHeaderMatch = /^Percolator v/
58
+ Delimiter = "\t"
59
+ attr_accessor :header
60
+ attr_accessor :spectra
61
+ attr_accessor :base_name
62
+ # boolean
63
+ attr_accessor :percolator_results
64
+
65
+ # returns [sequence_length, locus_count] of the fasta file
66
+ def self.db_seq_length_and_locus_count(dbfile)
67
+ total_sequence_length = 0
68
+ fastasize = 0
69
+ Mspire::Fasta.open(dbfile) do |fasta|
70
+ fasta.each do |entry|
71
+ total_sequence_length += entry.sequence.size
72
+ fastasize += 1
73
+ end
74
+ end
75
+ [total_sequence_length, fastasize]
76
+ end
77
+
78
+ #--
79
+ # this is implemented separate from sequence length because seq length
80
+ # uses Archive which doesn't preserve carriage returns and newlines.
81
+ #++
82
+ def self.db_md5sum(dbfile)
83
+ chunksize = 61440
84
+ digest = Digest::MD5.new
85
+ File.open(dbfile) do |io|
86
+ while chunk = io.read(chunksize)
87
+ digest << chunk
88
+ end
89
+ end
90
+ digest.hexdigest
91
+ end
92
+
93
+ # assumes the file exists and is readable
94
+ # returns [DBSeqLength, DBLocusCount, DBMD5Sum]
95
+ def self.db_info(dbfile)
96
+ # returns the 3 member array
97
+ self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
98
+ end
99
+
100
+ def protein_class
101
+ Mspire::Sequest::Sqt::Locus
102
+ end
103
+
104
+ # opts =
105
+ # :percolator_results => false | true (default false)
106
+ # :link_protein_hits => true | false (default true)
107
+ def initialize(filename=nil, opts={})
108
+ peptide_hits = []
109
+ if filename
110
+ from_file(filename, opts)
111
+ end
112
+ end
113
+
114
+ NEW_PROT = lambda do |_prot, _peptides|
115
+ Mspire::Sequest::Sqt::Locus.new(_prot.locus, _prot.description, _peptides)
116
+ end
117
+
118
+ # if the file contains the header key '/$Percolator v/' then the results
119
+ # will be interpreted as percolator results regardless of the value
120
+ # passed in.
121
+ def from_file(filename, opts={})
122
+ opts = {:percolator_results=>false, :link_protein_hits => true}.merge(opts)
123
+ @percolator_results = opts[:percolator_results]
124
+ @base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
125
+ File.open(filename) do |fh|
126
+ @header = Mspire::Sequest::Sqt::Header.new.from_handle(fh)
127
+ if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
128
+ @percolator_results = true
129
+ end
130
+ (@spectra, @peptides) = Mspire::Sequest::Sqt::Spectrum.spectra_from_handle(fh, @base_name, @percolator_results)
131
+ end
132
+ end
133
+
134
+
135
+ # Inherits from hash, so all header stuff can be accessed by key. Multiline
136
+ # values will be pushed into an array.
137
+ # All header values are stored as (newline-removed) strings!
138
+ class Header < Hash
139
+ Leader = 'H'
140
+
141
+ # These will be in arrays no matter what: StaticMod, DynamicMod, Comment
142
+ # Any other keys repeated will be shoved into an array; otherwise a string
143
+ Arrayed = %w(DyanmicMod StaticMod Comment).to_set
144
+
145
+ HeaderKeys = {
146
+ :sqt_generator => 'SQTGenerator',
147
+ :sqt_generator_version => 'SQTGeneratorVersion',
148
+ :database => 'Database',
149
+ :fragment_masses => 'FragmentMasses',
150
+ :precursor_masses => 'PrecursorMasses',
151
+ :start_time => 'StartTime',
152
+ :db_seq_length => 'DBSeqLength',
153
+ :db_locus_count => 'DBLocusCount',
154
+ :db_md5sum => 'DBMD5Sum',
155
+ :peptide_mass_tolerance => 'Alg-PreMassTol',
156
+ :fragment_ion_tolerance => 'Alg-FragMassTol',
157
+ # nonstandard (mine)
158
+ :peptide_mass_units => 'Alg-PreMassUnits',
159
+ :ion_series => 'Alg-IonSeries',
160
+ :enzyme => 'Alg-Enzyme',
161
+ # nonstandard (mine)
162
+ :ms_model => 'Alg-MSModel',
163
+ :static_mods => 'StaticMod',
164
+ :dynamic_mods => 'DynamicMod',
165
+ :comments => 'Comment'
166
+ }
167
+
168
+
169
+ KeysToAtts = HeaderKeys.invert
170
+
171
+ HeaderKeys.keys.each do |ky|
172
+ attr_accessor ky
173
+ end
174
+
175
+ def from_handle(fh)
176
+ Arrayed.each do |ky|
177
+ self[ky] = []
178
+ end
179
+ pos = fh.pos
180
+ lines = []
181
+ loop do
182
+ line = fh.gets
183
+ if line && (line[0,1] == Mspire::Sequest::Sqt::Header::Leader )
184
+ lines << line
185
+ else # reset the fh.pos and we're done
186
+ fh.pos = pos
187
+ break
188
+ end
189
+ pos = fh.pos
190
+ end
191
+ from_lines(lines)
192
+ end
193
+
194
+ def from_lines(array_of_header_lines)
195
+ array_of_header_lines.each do |line|
196
+ line.chomp!
197
+ (ky, *rest) = line.split(Mspire::Sequest::Sqt::Delimiter)[1..-1]
198
+ # just in case they have any tabs in their field
199
+ value = rest.join(Mspire::Sequest::Sqt::Delimiter)
200
+ if Arrayed.include?(ky)
201
+ self[ky] << value
202
+ elsif self.key? ky # already exists
203
+ if self[ky].is_a? Array
204
+ self[ky] << value
205
+ else
206
+ self[ky] = [self[ky], value]
207
+ end
208
+ else # normal
209
+ self[ky] = value
210
+ end
211
+ end
212
+ KeysToAtts.each do |ky,methd|
213
+ self.send("#{methd}=".to_sym, self[ky])
214
+ end
215
+ self
216
+ end
217
+
218
+ end
219
+ end
220
+ end
221
+ end
222
+
223
+ # all are cast as expected (total_intensity is a float)
224
+ # mh = observed mh
225
+ Mspire::Sequest::Sqt::Spectrum = Struct.new(* %w(first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches).map(&:to_sym) )
226
+
227
+ # 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
228
+
229
+ class Mspire::Sequest::Sqt::Spectrum
230
+ Leader = 'S'
231
+
232
+ # assumes the first line starts with an 'S'
233
+ def self.spectra_from_handle(fh, base_name, percolator_results=false)
234
+ peptides = []
235
+ spectra = []
236
+
237
+ while line = fh.gets
238
+ case line[0,1]
239
+ when Mspire::Sequest::Sqt::Spectrum::Leader
240
+ spectrum = Mspire::Sequest::Sqt::Spectrum.new.from_line( line )
241
+ spectra << spectrum
242
+ matches = []
243
+ spectrum.matches = matches
244
+ when Mspire::Sequest::Sqt::Match::Leader
245
+ match_klass = if percolator_results
246
+ Mspire::Sequest::Sqt::Match::Percolator
247
+ else
248
+ Mspire::Sequest::Sqt::Match
249
+ end
250
+ match = match_klass.new.from_line( line )
251
+ #match[10,3] = spectrum[0,3]
252
+ # structs cannot set multiple values at a time :(
253
+ match[10] = spectrum[0]
254
+ match[11] = spectrum[1]
255
+ match[12] = spectrum[2]
256
+ match[15] = base_name
257
+ matches << match
258
+ peptides << match
259
+ loci = []
260
+ match.loci = loci
261
+ matches << match
262
+ when Mspire::Sequest::Sqt::Locus::Leader
263
+ line.chomp!
264
+ key = line.split(Mspire::Sequest::Sqt::Delimiter)[1]
265
+ locus = Mspire::Sequest::Sqt::Locus.from_line( line )
266
+ loci << locus
267
+ end
268
+ end
269
+ # set the deltacn:
270
+ set_deltacn(spectra)
271
+ [spectra, peptides]
272
+ end
273
+
274
+ def self.set_deltacn(spectra)
275
+ spectra.each do |spec|
276
+ matches = spec.matches
277
+ if matches.size > 0
278
+
279
+ (0...(matches.size-1)).each do |i|
280
+ matches[i].deltacn = matches[i+1].deltacn_orig
281
+ end
282
+ matches[-1].deltacn = 1.1
283
+ end
284
+ end
285
+ spectra
286
+ end
287
+
288
+
289
+ # returns an array -> [the next spectra line (or nil if eof), spectrum]
290
+ def from_line(line)
291
+ line.chomp!
292
+ ar = line.split(Mspire::Sequest::Sqt::Delimiter)
293
+ self[0] = ar[1].to_i
294
+ self[1] = ar[2].to_i
295
+ self[2] = ar[3].to_i
296
+ self[3] = ar[4].to_f
297
+ self[4] = ar[5]
298
+ self[5] = ar[6].to_f
299
+ self[6] = ar[7].to_f
300
+ self[7] = ar[8].to_f
301
+ self[8] = ar[9].to_i
302
+ self[9] = []
303
+ self
304
+ end
305
+ end
306
+
307
+ # Sqt format uses only indices 0 - 9
308
+ Mspire::Sequest::Sqt::Match = Struct.new( *%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci].map(&:to_sym) )
309
+
310
+ # 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
311
+
312
+ # rxcorr = rank by xcorr
313
+ # rsp = rank by sp
314
+ # NOTE:
315
+ # deltacn_orig
316
+ # deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
317
+ # give the last one 1.1)
318
+ class Mspire::Sequest::Sqt::Match
319
+ Leader = 'M'
320
+
321
+ # same as 'loci'
322
+ def proteins
323
+ self[16]
324
+ end
325
+
326
+ def from_line(line)
327
+ line.chomp!
328
+ ar = line.split(Mspire::Sequest::Sqt::Delimiter)
329
+ self[0] = ar[1].to_i
330
+ self[1] = ar[2].to_i
331
+ self[2] = ar[3].to_f
332
+ self[3] = ar[4].to_f
333
+ self[4] = ar[5].to_f
334
+ self[5] = ar[6].to_f
335
+ self[6] = ar[7].to_i
336
+ self[7] = ar[8].to_i
337
+ self[8] = ar[9]
338
+ self[9] = ar[10]
339
+ self[14] = Mspire::Ident::Peptide.sequence_to_aaseq(self[8])
340
+ self
341
+ end
342
+ end
343
+
344
+
345
+ class Mspire::Sequest::Sqt::Match::Percolator < Mspire::Sequest::Sqt::Match
346
+ # we will keep access to these old terms since we can then access routines
347
+ # that sort on xcorr...
348
+ #undef_method :xcorr
349
+ #undef_method :xcorr=
350
+ #undef_method :sp
351
+ #undef_method :sp=
352
+
353
+ def percolator_score
354
+ self[4]
355
+ end
356
+ def percolator_score=(score)
357
+ self[4] = score
358
+ end
359
+ def negative_q_value
360
+ self[5]
361
+ end
362
+ def negative_q_value=(arg)
363
+ self[5] = arg
364
+ end
365
+ def q_value
366
+ -self[5]
367
+ end
368
+ # for compatibility with scripts that want this guy
369
+ def probability
370
+ -self[5]
371
+ end
372
+ end
373
+
374
+ Mspire::Sequest::Sqt::Locus = Struct.new( :locus, :description )
375
+
376
+ class Mspire::Sequest::Sqt::Locus
377
+ Leader = 'L'
378
+
379
+ def first_entry ; self[0] end
380
+ def reference ; self[0] end
381
+ def id ; self[0] end
382
+
383
+ def initialize(locus=nil, description=nil, peptides=[])
384
+ super(locus, description)
385
+ end
386
+
387
+ # returns a new Locus object
388
+ def self.from_line(line)
389
+ line.chomp!
390
+ self.new( *line.split(Mspire::Sequest::Sqt::Delimiter) ) # fills in the first two values
391
+ end
392
+
393
+ end
@@ -0,0 +1,21 @@
1
+ module Mspire ; end
2
+ module Mspire::Ident ; end
3
+
4
+ class Mspire::Ident::Pepxml
5
+ class SearchHit
6
+ Sequest = Struct.new(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank) do
7
+
8
+ # Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
9
+ def self.split_ions(ions)
10
+ ions.split("/").map {|ion| ion.to_i }
11
+ end
12
+
13
+ def to_xml(builder)
14
+ members.zip(self.to_a) do |sym, val|
15
+ builder.search_score(:name => sym, :value => val)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+