mspire-sequest 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
require 'mspire/ident/pepxml/search_hit/modification_info'
|
2
|
+
|
3
|
+
module Mspire ; end
|
4
|
+
module Mspire::Sequest ; end
|
5
|
+
class Mspire::Sequest::Pepxml ; end
|
6
|
+
|
7
|
+
class Mspire::Sequest::Pepxml::Modifications
|
8
|
+
# sequest params object
|
9
|
+
attr_accessor :params
|
10
|
+
# array holding AAModifications
|
11
|
+
attr_accessor :aa_mods
|
12
|
+
# array holding TerminalModifications
|
13
|
+
attr_accessor :term_mods
|
14
|
+
# a hash of all differential modifications present by aa_one_letter_symbol
|
15
|
+
# and special_symbol. This is NOT the mass difference but the total mass {
|
16
|
+
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
17
|
+
# the amino acid sequence, they are give the *differential* mass. The
|
18
|
+
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
19
|
+
# cterminus ']' => 14.55 # nterminus
|
20
|
+
attr_accessor :aa_mod_to_tot_mass
|
21
|
+
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
22
|
+
# values are the special_symbols
|
23
|
+
attr_accessor :mod_symbols_hash
|
24
|
+
|
25
|
+
# returns an array of all modifications (aa_mods, then term_mods)
|
26
|
+
def modifications
|
27
|
+
aa_mods + term_mods
|
28
|
+
end
|
29
|
+
|
30
|
+
# The modification symbols string looks like this:
|
31
|
+
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
32
|
+
# ct is cterminal peptide (differential)
|
33
|
+
# nt is nterminal peptide (differential)
|
34
|
+
# the C is just cysteine
|
35
|
+
# will set_modifications and aa_mod_to_tot_mass hash
|
36
|
+
def initialize(params=nil, modification_symbols_string='')
|
37
|
+
@params = params
|
38
|
+
if @params
|
39
|
+
set_modifications(params, modification_symbols_string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# set the aa_mod_to_tot_mass and mod_symbols_hash from
|
44
|
+
def set_hashes(modification_symbols_string)
|
45
|
+
|
46
|
+
@mod_symbols_hash = {}
|
47
|
+
@aa_mod_to_tot_mass = {}
|
48
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
table = @params.mass_index(:precursor)
|
52
|
+
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
53
|
+
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
54
|
+
if $1 == 'ct' || $1 == 'nt'
|
55
|
+
mass_diff = $3.to_f
|
56
|
+
@aa_mod_to_tot_mass[$2] = mass_diff
|
57
|
+
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
58
|
+
# changed from below to match tests, is this right?
|
59
|
+
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
60
|
+
else
|
61
|
+
symbol_string = $2.dup
|
62
|
+
mass_diff = $3.to_f
|
63
|
+
$1.split('').each do |aa|
|
64
|
+
aa_as_sym = aa.to_sym
|
65
|
+
@aa_mod_to_tot_mass[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
66
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
# returns an array of static mod objects and static terminal mod objects
|
73
|
+
def create_static_mods(params)
|
74
|
+
|
75
|
+
####################################
|
76
|
+
## static mods
|
77
|
+
####################################
|
78
|
+
|
79
|
+
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
80
|
+
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
81
|
+
|
82
|
+
params.mods.each do |k,v|
|
83
|
+
v_to_f = v.to_f
|
84
|
+
if v_to_f != 0.0
|
85
|
+
if k =~ /add_(\w)_/
|
86
|
+
static_mods << [$1.to_sym, v_to_f]
|
87
|
+
else
|
88
|
+
static_terminal_mods << [k, v_to_f]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
aa_hash = params.mass_index(:precursor)
|
93
|
+
|
94
|
+
## Create the static_mods objects
|
95
|
+
static_mods.map! do |mod|
|
96
|
+
hash = {
|
97
|
+
:aminoacid => mod[0].to_s,
|
98
|
+
:massdiff => mod[1],
|
99
|
+
:mass => aa_hash[mod[0]] + mod[1],
|
100
|
+
:variable => 'N',
|
101
|
+
:binary => 'Y',
|
102
|
+
}
|
103
|
+
Mspire::Ident::Pepxml::AminoacidModification.new(hash)
|
104
|
+
end
|
105
|
+
|
106
|
+
## Create the static_terminal_mods objects
|
107
|
+
static_terminal_mods.map! do |mod|
|
108
|
+
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
109
|
+
else ; 'n' # only two possible termini
|
110
|
+
end
|
111
|
+
protein_terminus = case mod[0]
|
112
|
+
when /Nterm_protein/ ; 'n'
|
113
|
+
when /Cterm_protein/ ; 'c'
|
114
|
+
else nil
|
115
|
+
end
|
116
|
+
|
117
|
+
# create the hash
|
118
|
+
hash = {
|
119
|
+
:terminus => terminus,
|
120
|
+
:massdiff => mod[1],
|
121
|
+
:variable => 'N',
|
122
|
+
:description => mod[0],
|
123
|
+
}
|
124
|
+
hash[:protein_terminus] = protein_terminus if protein_terminus
|
125
|
+
Mspire::Ident::Pepxml::TerminalModification.new(hash)
|
126
|
+
end
|
127
|
+
[static_mods, static_terminal_mods]
|
128
|
+
end
|
129
|
+
|
130
|
+
# 1. sets aa_mods and term_mods from a sequest params object
|
131
|
+
# 2. sets @params
|
132
|
+
# 3. sets @aa_mod_to_tot_mass
|
133
|
+
def set_modifications(params, modification_symbols_string)
|
134
|
+
@params = params
|
135
|
+
|
136
|
+
set_hashes(modification_symbols_string)
|
137
|
+
(static_mods, static_terminal_mods) = create_static_mods(params)
|
138
|
+
|
139
|
+
aa_hash = params.mass_index(:precursor)
|
140
|
+
#################################
|
141
|
+
# Variable Mods:
|
142
|
+
#################################
|
143
|
+
arr = params.diff_search_options.rstrip.split(/\s+/)
|
144
|
+
# [aa.to_sym, diff.to_f]
|
145
|
+
variable_mods = []
|
146
|
+
(0...arr.size).step(2) do |i|
|
147
|
+
if arr[i].to_f != 0.0
|
148
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
mod_objects = []
|
152
|
+
variable_mods.each do |mod|
|
153
|
+
mod[0].split('').each do |aa|
|
154
|
+
hash = {
|
155
|
+
|
156
|
+
:aminoacid => aa,
|
157
|
+
:massdiff => mod[1],
|
158
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
159
|
+
:variable => 'Y',
|
160
|
+
:binary => 'N',
|
161
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
162
|
+
}
|
163
|
+
mod_objects << Mspire::Ident::Pepxml::AminoacidModification.new(hash)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
variable_mods = mod_objects
|
168
|
+
#################################
|
169
|
+
# TERMINAL Variable Mods:
|
170
|
+
#################################
|
171
|
+
# These are always peptide, not protein termini (for sequest)
|
172
|
+
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
173
|
+
|
174
|
+
to_add = []
|
175
|
+
if nterm_diff != 0.0
|
176
|
+
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
177
|
+
end
|
178
|
+
if cterm_diff != 0.0
|
179
|
+
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
180
|
+
end
|
181
|
+
|
182
|
+
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
183
|
+
hash = {
|
184
|
+
:terminus => term,
|
185
|
+
:massdiff => mssdiff,
|
186
|
+
:variable => 'Y',
|
187
|
+
:symbol => symb,
|
188
|
+
}
|
189
|
+
Mspire::Ident::Pepxml::TerminalModification.new(hash)
|
190
|
+
end
|
191
|
+
|
192
|
+
#########################
|
193
|
+
# COLLECT THEM
|
194
|
+
#########################
|
195
|
+
@aa_mods = static_mods + variable_mods
|
196
|
+
@term_mods = static_terminal_mods + variable_terminal_mods
|
197
|
+
end
|
198
|
+
|
199
|
+
# takes a peptide sequence with modifications but no preceding or trailing
|
200
|
+
# amino acids. (e.g. expects "]PEPT*IDE" but not 'K.PEPTIDE.R')
|
201
|
+
# returns a ModificationInfo object
|
202
|
+
# if there are no modifications, returns nil
|
203
|
+
def modification_info(mod_peptide)
|
204
|
+
return nil if @aa_mod_to_tot_mass.size == 0
|
205
|
+
mod_info = Mspire::Ident::Pepxml::SearchHit::ModificationInfo.new( mod_peptide.dup )
|
206
|
+
mass_table = @params.mass_index(:precursor)
|
207
|
+
|
208
|
+
# TERMINI:
|
209
|
+
## only the termini can match a single char
|
210
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[0,1]
|
211
|
+
# AA + H + differential_mod
|
212
|
+
mod_info.mod_nterm_mass = mass_table[mod_peptide[1,1].to_sym] + mass_table['h+'] + @aa_mod_to_tot_mass[mod_peptide[0,1]]
|
213
|
+
mod_peptide = mod_peptide[1...(mod_peptide.size)]
|
214
|
+
end
|
215
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[(mod_peptide.size-1),1]
|
216
|
+
# AA + OH + differential_mod
|
217
|
+
mod_info.mod_cterm_mass = mass_table[mod_peptide[(mod_peptide.size-2),1].to_sym] + mass_table['oh'] + @aa_mod_to_tot_mass[mod_peptide[-1,1]]
|
218
|
+
mod_peptide = mod_peptide[0...(mod_peptide.size-1)]
|
219
|
+
end
|
220
|
+
|
221
|
+
# OTHER DIFFERENTIAL MODS:
|
222
|
+
mod_array = []
|
223
|
+
mod_cnt = 1
|
224
|
+
bare_cnt = 1
|
225
|
+
last_normal_aa = mod_peptide[0,1]
|
226
|
+
(1...mod_peptide.size).each do |i|
|
227
|
+
if @aa_mod_to_tot_mass.key?( last_normal_aa + mod_peptide[i,1] )
|
228
|
+
# we don't save the result because most amino acids will not be
|
229
|
+
# modified
|
230
|
+
mod_array << Mspire::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(bare_cnt, @aa_mod_to_tot_mass[last_normal_aa + mod_peptide[i,1]])
|
231
|
+
else
|
232
|
+
last_normal_aa = mod_peptide[i,1]
|
233
|
+
bare_cnt += 1
|
234
|
+
end
|
235
|
+
mod_cnt += 1
|
236
|
+
end
|
237
|
+
if mod_cnt == bare_cnt
|
238
|
+
nil
|
239
|
+
else
|
240
|
+
mod_info.mod_aminoacid_masses = mod_array if mod_array.size > 0
|
241
|
+
mod_info
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
end
|
247
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
|
2
|
+
module Mspire ; end
|
3
|
+
module Mspire::Sequest ; end
|
4
|
+
|
5
|
+
class Mspire::Sequest::Params
|
6
|
+
|
7
|
+
# returns a Mspire::Ident::Pepxml::SampleEnzyme object
|
8
|
+
def sample_enzyme
|
9
|
+
Mspire::Ident::Pepxml::SampleEnzyme.new(sample_enzyme_hash)
|
10
|
+
end
|
11
|
+
|
12
|
+
# returns a hash suitable for setting a Mspire::Ident::Pepxml::SampleEnzyme object
|
13
|
+
def sample_enzyme_hash
|
14
|
+
(offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
|
15
|
+
if v == '' ; nil ; else v end
|
16
|
+
end
|
17
|
+
hash = {}
|
18
|
+
hash[:name] = self.enzyme
|
19
|
+
hash[:cut] = cleave_at
|
20
|
+
hash[:no_cut] = except_if_after
|
21
|
+
hash[:sense] =
|
22
|
+
if hash[:name] == "No_Enzyme"
|
23
|
+
nil
|
24
|
+
elsif offset == 1
|
25
|
+
'C'
|
26
|
+
elsif offset == 0
|
27
|
+
'N'
|
28
|
+
end
|
29
|
+
hash
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,393 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
require 'mspire/fasta'
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
require 'mspire/ident/peptide'
|
7
|
+
require 'mspire/ident/search'
|
8
|
+
|
9
|
+
module Mspire
|
10
|
+
module Sequest
|
11
|
+
class SqtGroup
|
12
|
+
include Mspire::Ident::SearchGroup
|
13
|
+
|
14
|
+
#attr_accessor :sqts, :filenames
|
15
|
+
|
16
|
+
def search_class
|
17
|
+
Mspire::Sequest::Sqt
|
18
|
+
end
|
19
|
+
|
20
|
+
def extension() 'sqg' end
|
21
|
+
|
22
|
+
def initialize(arg, opts={}, &block)
|
23
|
+
orig_opts = opts.dup
|
24
|
+
indiv_opts = { :link_protein_hits => false }
|
25
|
+
super(arg, opts.merge(indiv_opts)) do
|
26
|
+
unless orig_opts[:link_protein_hits] == false
|
27
|
+
puts "MERGING GROUP!"
|
28
|
+
(@peptides, @proteins) = merge!(@searches.map {|v| v.peptides }, &Mspire::Sequest::Sqt::NEW_PROT)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
block.call(self) if block_given?
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# # NOTE THAT this is copy/paste from srf.rb, should be refactored...
|
36
|
+
## returns the filename used
|
37
|
+
## if the file exists, the name will be expanded to full path, otherwise just
|
38
|
+
## what is given
|
39
|
+
#def to_sqg(sqg_filename='bioworks.sqg')
|
40
|
+
#File.open(sqg_filename, 'w') do |v|
|
41
|
+
#@filenames.each do |sqt_file|
|
42
|
+
#if File.exist? sqt_file
|
43
|
+
#v.puts File.expand_path(sqt_file)
|
44
|
+
#else
|
45
|
+
#v.puts sqt_file
|
46
|
+
#end
|
47
|
+
#end
|
48
|
+
#end
|
49
|
+
#sqg_filename
|
50
|
+
#end
|
51
|
+
|
52
|
+
end # SqtGroup
|
53
|
+
|
54
|
+
|
55
|
+
class Sqt
|
56
|
+
include Mspire::Ident::SearchLike
|
57
|
+
PercolatorHeaderMatch = /^Percolator v/
|
58
|
+
Delimiter = "\t"
|
59
|
+
attr_accessor :header
|
60
|
+
attr_accessor :spectra
|
61
|
+
attr_accessor :base_name
|
62
|
+
# boolean
|
63
|
+
attr_accessor :percolator_results
|
64
|
+
|
65
|
+
# returns [sequence_length, locus_count] of the fasta file
|
66
|
+
def self.db_seq_length_and_locus_count(dbfile)
|
67
|
+
total_sequence_length = 0
|
68
|
+
fastasize = 0
|
69
|
+
Mspire::Fasta.open(dbfile) do |fasta|
|
70
|
+
fasta.each do |entry|
|
71
|
+
total_sequence_length += entry.sequence.size
|
72
|
+
fastasize += 1
|
73
|
+
end
|
74
|
+
end
|
75
|
+
[total_sequence_length, fastasize]
|
76
|
+
end
|
77
|
+
|
78
|
+
#--
|
79
|
+
# this is implemented separate from sequence length because seq length
|
80
|
+
# uses Archive which doesn't preserve carriage returns and newlines.
|
81
|
+
#++
|
82
|
+
def self.db_md5sum(dbfile)
|
83
|
+
chunksize = 61440
|
84
|
+
digest = Digest::MD5.new
|
85
|
+
File.open(dbfile) do |io|
|
86
|
+
while chunk = io.read(chunksize)
|
87
|
+
digest << chunk
|
88
|
+
end
|
89
|
+
end
|
90
|
+
digest.hexdigest
|
91
|
+
end
|
92
|
+
|
93
|
+
# assumes the file exists and is readable
|
94
|
+
# returns [DBSeqLength, DBLocusCount, DBMD5Sum]
|
95
|
+
def self.db_info(dbfile)
|
96
|
+
# returns the 3 member array
|
97
|
+
self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
|
98
|
+
end
|
99
|
+
|
100
|
+
def protein_class
|
101
|
+
Mspire::Sequest::Sqt::Locus
|
102
|
+
end
|
103
|
+
|
104
|
+
# opts =
|
105
|
+
# :percolator_results => false | true (default false)
|
106
|
+
# :link_protein_hits => true | false (default true)
|
107
|
+
def initialize(filename=nil, opts={})
|
108
|
+
peptide_hits = []
|
109
|
+
if filename
|
110
|
+
from_file(filename, opts)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
NEW_PROT = lambda do |_prot, _peptides|
|
115
|
+
Mspire::Sequest::Sqt::Locus.new(_prot.locus, _prot.description, _peptides)
|
116
|
+
end
|
117
|
+
|
118
|
+
# if the file contains the header key '/$Percolator v/' then the results
|
119
|
+
# will be interpreted as percolator results regardless of the value
|
120
|
+
# passed in.
|
121
|
+
def from_file(filename, opts={})
|
122
|
+
opts = {:percolator_results=>false, :link_protein_hits => true}.merge(opts)
|
123
|
+
@percolator_results = opts[:percolator_results]
|
124
|
+
@base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
|
125
|
+
File.open(filename) do |fh|
|
126
|
+
@header = Mspire::Sequest::Sqt::Header.new.from_handle(fh)
|
127
|
+
if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
|
128
|
+
@percolator_results = true
|
129
|
+
end
|
130
|
+
(@spectra, @peptides) = Mspire::Sequest::Sqt::Spectrum.spectra_from_handle(fh, @base_name, @percolator_results)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Inherits from hash, so all header stuff can be accessed by key. Multiline
|
136
|
+
# values will be pushed into an array.
|
137
|
+
# All header values are stored as (newline-removed) strings!
|
138
|
+
class Header < Hash
|
139
|
+
Leader = 'H'
|
140
|
+
|
141
|
+
# These will be in arrays no matter what: StaticMod, DynamicMod, Comment
|
142
|
+
# Any other keys repeated will be shoved into an array; otherwise a string
|
143
|
+
Arrayed = %w(DyanmicMod StaticMod Comment).to_set
|
144
|
+
|
145
|
+
HeaderKeys = {
|
146
|
+
:sqt_generator => 'SQTGenerator',
|
147
|
+
:sqt_generator_version => 'SQTGeneratorVersion',
|
148
|
+
:database => 'Database',
|
149
|
+
:fragment_masses => 'FragmentMasses',
|
150
|
+
:precursor_masses => 'PrecursorMasses',
|
151
|
+
:start_time => 'StartTime',
|
152
|
+
:db_seq_length => 'DBSeqLength',
|
153
|
+
:db_locus_count => 'DBLocusCount',
|
154
|
+
:db_md5sum => 'DBMD5Sum',
|
155
|
+
:peptide_mass_tolerance => 'Alg-PreMassTol',
|
156
|
+
:fragment_ion_tolerance => 'Alg-FragMassTol',
|
157
|
+
# nonstandard (mine)
|
158
|
+
:peptide_mass_units => 'Alg-PreMassUnits',
|
159
|
+
:ion_series => 'Alg-IonSeries',
|
160
|
+
:enzyme => 'Alg-Enzyme',
|
161
|
+
# nonstandard (mine)
|
162
|
+
:ms_model => 'Alg-MSModel',
|
163
|
+
:static_mods => 'StaticMod',
|
164
|
+
:dynamic_mods => 'DynamicMod',
|
165
|
+
:comments => 'Comment'
|
166
|
+
}
|
167
|
+
|
168
|
+
|
169
|
+
KeysToAtts = HeaderKeys.invert
|
170
|
+
|
171
|
+
HeaderKeys.keys.each do |ky|
|
172
|
+
attr_accessor ky
|
173
|
+
end
|
174
|
+
|
175
|
+
def from_handle(fh)
|
176
|
+
Arrayed.each do |ky|
|
177
|
+
self[ky] = []
|
178
|
+
end
|
179
|
+
pos = fh.pos
|
180
|
+
lines = []
|
181
|
+
loop do
|
182
|
+
line = fh.gets
|
183
|
+
if line && (line[0,1] == Mspire::Sequest::Sqt::Header::Leader )
|
184
|
+
lines << line
|
185
|
+
else # reset the fh.pos and we're done
|
186
|
+
fh.pos = pos
|
187
|
+
break
|
188
|
+
end
|
189
|
+
pos = fh.pos
|
190
|
+
end
|
191
|
+
from_lines(lines)
|
192
|
+
end
|
193
|
+
|
194
|
+
def from_lines(array_of_header_lines)
|
195
|
+
array_of_header_lines.each do |line|
|
196
|
+
line.chomp!
|
197
|
+
(ky, *rest) = line.split(Mspire::Sequest::Sqt::Delimiter)[1..-1]
|
198
|
+
# just in case they have any tabs in their field
|
199
|
+
value = rest.join(Mspire::Sequest::Sqt::Delimiter)
|
200
|
+
if Arrayed.include?(ky)
|
201
|
+
self[ky] << value
|
202
|
+
elsif self.key? ky # already exists
|
203
|
+
if self[ky].is_a? Array
|
204
|
+
self[ky] << value
|
205
|
+
else
|
206
|
+
self[ky] = [self[ky], value]
|
207
|
+
end
|
208
|
+
else # normal
|
209
|
+
self[ky] = value
|
210
|
+
end
|
211
|
+
end
|
212
|
+
KeysToAtts.each do |ky,methd|
|
213
|
+
self.send("#{methd}=".to_sym, self[ky])
|
214
|
+
end
|
215
|
+
self
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
# all are cast as expected (total_intensity is a float)
|
224
|
+
# mh = observed mh
|
225
|
+
Mspire::Sequest::Sqt::Spectrum = Struct.new(* %w(first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches).map(&:to_sym) )
|
226
|
+
|
227
|
+
# 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
|
228
|
+
|
229
|
+
class Mspire::Sequest::Sqt::Spectrum
|
230
|
+
Leader = 'S'
|
231
|
+
|
232
|
+
# assumes the first line starts with an 'S'
|
233
|
+
def self.spectra_from_handle(fh, base_name, percolator_results=false)
|
234
|
+
peptides = []
|
235
|
+
spectra = []
|
236
|
+
|
237
|
+
while line = fh.gets
|
238
|
+
case line[0,1]
|
239
|
+
when Mspire::Sequest::Sqt::Spectrum::Leader
|
240
|
+
spectrum = Mspire::Sequest::Sqt::Spectrum.new.from_line( line )
|
241
|
+
spectra << spectrum
|
242
|
+
matches = []
|
243
|
+
spectrum.matches = matches
|
244
|
+
when Mspire::Sequest::Sqt::Match::Leader
|
245
|
+
match_klass = if percolator_results
|
246
|
+
Mspire::Sequest::Sqt::Match::Percolator
|
247
|
+
else
|
248
|
+
Mspire::Sequest::Sqt::Match
|
249
|
+
end
|
250
|
+
match = match_klass.new.from_line( line )
|
251
|
+
#match[10,3] = spectrum[0,3]
|
252
|
+
# structs cannot set multiple values at a time :(
|
253
|
+
match[10] = spectrum[0]
|
254
|
+
match[11] = spectrum[1]
|
255
|
+
match[12] = spectrum[2]
|
256
|
+
match[15] = base_name
|
257
|
+
matches << match
|
258
|
+
peptides << match
|
259
|
+
loci = []
|
260
|
+
match.loci = loci
|
261
|
+
matches << match
|
262
|
+
when Mspire::Sequest::Sqt::Locus::Leader
|
263
|
+
line.chomp!
|
264
|
+
key = line.split(Mspire::Sequest::Sqt::Delimiter)[1]
|
265
|
+
locus = Mspire::Sequest::Sqt::Locus.from_line( line )
|
266
|
+
loci << locus
|
267
|
+
end
|
268
|
+
end
|
269
|
+
# set the deltacn:
|
270
|
+
set_deltacn(spectra)
|
271
|
+
[spectra, peptides]
|
272
|
+
end
|
273
|
+
|
274
|
+
def self.set_deltacn(spectra)
|
275
|
+
spectra.each do |spec|
|
276
|
+
matches = spec.matches
|
277
|
+
if matches.size > 0
|
278
|
+
|
279
|
+
(0...(matches.size-1)).each do |i|
|
280
|
+
matches[i].deltacn = matches[i+1].deltacn_orig
|
281
|
+
end
|
282
|
+
matches[-1].deltacn = 1.1
|
283
|
+
end
|
284
|
+
end
|
285
|
+
spectra
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
# returns an array -> [the next spectra line (or nil if eof), spectrum]
|
290
|
+
def from_line(line)
|
291
|
+
line.chomp!
|
292
|
+
ar = line.split(Mspire::Sequest::Sqt::Delimiter)
|
293
|
+
self[0] = ar[1].to_i
|
294
|
+
self[1] = ar[2].to_i
|
295
|
+
self[2] = ar[3].to_i
|
296
|
+
self[3] = ar[4].to_f
|
297
|
+
self[4] = ar[5]
|
298
|
+
self[5] = ar[6].to_f
|
299
|
+
self[6] = ar[7].to_f
|
300
|
+
self[7] = ar[8].to_f
|
301
|
+
self[8] = ar[9].to_i
|
302
|
+
self[9] = []
|
303
|
+
self
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
# Sqt format uses only indices 0 - 9
|
308
|
+
Mspire::Sequest::Sqt::Match = Struct.new( *%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci].map(&:to_sym) )
|
309
|
+
|
310
|
+
# 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
|
311
|
+
|
312
|
+
# rxcorr = rank by xcorr
|
313
|
+
# rsp = rank by sp
|
314
|
+
# NOTE:
|
315
|
+
# deltacn_orig
|
316
|
+
# deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
|
317
|
+
# give the last one 1.1)
|
318
|
+
class Mspire::Sequest::Sqt::Match
|
319
|
+
Leader = 'M'
|
320
|
+
|
321
|
+
# same as 'loci'
|
322
|
+
def proteins
|
323
|
+
self[16]
|
324
|
+
end
|
325
|
+
|
326
|
+
def from_line(line)
|
327
|
+
line.chomp!
|
328
|
+
ar = line.split(Mspire::Sequest::Sqt::Delimiter)
|
329
|
+
self[0] = ar[1].to_i
|
330
|
+
self[1] = ar[2].to_i
|
331
|
+
self[2] = ar[3].to_f
|
332
|
+
self[3] = ar[4].to_f
|
333
|
+
self[4] = ar[5].to_f
|
334
|
+
self[5] = ar[6].to_f
|
335
|
+
self[6] = ar[7].to_i
|
336
|
+
self[7] = ar[8].to_i
|
337
|
+
self[8] = ar[9]
|
338
|
+
self[9] = ar[10]
|
339
|
+
self[14] = Mspire::Ident::Peptide.sequence_to_aaseq(self[8])
|
340
|
+
self
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
|
345
|
+
class Mspire::Sequest::Sqt::Match::Percolator < Mspire::Sequest::Sqt::Match
|
346
|
+
# we will keep access to these old terms since we can then access routines
|
347
|
+
# that sort on xcorr...
|
348
|
+
#undef_method :xcorr
|
349
|
+
#undef_method :xcorr=
|
350
|
+
#undef_method :sp
|
351
|
+
#undef_method :sp=
|
352
|
+
|
353
|
+
def percolator_score
|
354
|
+
self[4]
|
355
|
+
end
|
356
|
+
def percolator_score=(score)
|
357
|
+
self[4] = score
|
358
|
+
end
|
359
|
+
def negative_q_value
|
360
|
+
self[5]
|
361
|
+
end
|
362
|
+
def negative_q_value=(arg)
|
363
|
+
self[5] = arg
|
364
|
+
end
|
365
|
+
def q_value
|
366
|
+
-self[5]
|
367
|
+
end
|
368
|
+
# for compatibility with scripts that want this guy
|
369
|
+
def probability
|
370
|
+
-self[5]
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
Mspire::Sequest::Sqt::Locus = Struct.new( :locus, :description )
|
375
|
+
|
376
|
+
class Mspire::Sequest::Sqt::Locus
|
377
|
+
Leader = 'L'
|
378
|
+
|
379
|
+
def first_entry ; self[0] end
|
380
|
+
def reference ; self[0] end
|
381
|
+
def id ; self[0] end
|
382
|
+
|
383
|
+
def initialize(locus=nil, description=nil, peptides=[])
|
384
|
+
super(locus, description)
|
385
|
+
end
|
386
|
+
|
387
|
+
# returns a new Locus object
|
388
|
+
def self.from_line(line)
|
389
|
+
line.chomp!
|
390
|
+
self.new( *line.split(Mspire::Sequest::Sqt::Delimiter) ) # fills in the first two values
|
391
|
+
end
|
392
|
+
|
393
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Mspire ; end
|
2
|
+
module Mspire::Ident ; end
|
3
|
+
|
4
|
+
class Mspire::Ident::Pepxml
|
5
|
+
class SearchHit
|
6
|
+
Sequest = Struct.new(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank) do
|
7
|
+
|
8
|
+
# Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
|
9
|
+
def self.split_ions(ions)
|
10
|
+
ions.split("/").map {|ion| ion.to_i }
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_xml(builder)
|
14
|
+
members.zip(self.to_a) do |sym, val|
|
15
|
+
builder.search_score(:name => sym, :value => val)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|