mspire-sequest 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,247 @@
|
|
1
|
+
require 'mspire/ident/pepxml/search_hit/modification_info'
|
2
|
+
|
3
|
+
module Mspire ; end
|
4
|
+
module Mspire::Sequest ; end
|
5
|
+
class Mspire::Sequest::Pepxml ; end
|
6
|
+
|
7
|
+
class Mspire::Sequest::Pepxml::Modifications
|
8
|
+
# sequest params object
|
9
|
+
attr_accessor :params
|
10
|
+
# array holding AAModifications
|
11
|
+
attr_accessor :aa_mods
|
12
|
+
# array holding TerminalModifications
|
13
|
+
attr_accessor :term_mods
|
14
|
+
# a hash of all differential modifications present by aa_one_letter_symbol
|
15
|
+
# and special_symbol. This is NOT the mass difference but the total mass {
|
16
|
+
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
17
|
+
# the amino acid sequence, they are give the *differential* mass. The
|
18
|
+
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
19
|
+
# cterminus ']' => 14.55 # nterminus
|
20
|
+
attr_accessor :aa_mod_to_tot_mass
|
21
|
+
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
22
|
+
# values are the special_symbols
|
23
|
+
attr_accessor :mod_symbols_hash
|
24
|
+
|
25
|
+
# returns an array of all modifications (aa_mods, then term_mods)
|
26
|
+
def modifications
|
27
|
+
aa_mods + term_mods
|
28
|
+
end
|
29
|
+
|
30
|
+
# The modification symbols string looks like this:
|
31
|
+
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
32
|
+
# ct is cterminal peptide (differential)
|
33
|
+
# nt is nterminal peptide (differential)
|
34
|
+
# the C is just cysteine
|
35
|
+
# will set_modifications and aa_mod_to_tot_mass hash
|
36
|
+
def initialize(params=nil, modification_symbols_string='')
|
37
|
+
@params = params
|
38
|
+
if @params
|
39
|
+
set_modifications(params, modification_symbols_string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# set the aa_mod_to_tot_mass and mod_symbols_hash from
|
44
|
+
def set_hashes(modification_symbols_string)
|
45
|
+
|
46
|
+
@mod_symbols_hash = {}
|
47
|
+
@aa_mod_to_tot_mass = {}
|
48
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
table = @params.mass_index(:precursor)
|
52
|
+
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
53
|
+
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
54
|
+
if $1 == 'ct' || $1 == 'nt'
|
55
|
+
mass_diff = $3.to_f
|
56
|
+
@aa_mod_to_tot_mass[$2] = mass_diff
|
57
|
+
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
58
|
+
# changed from below to match tests, is this right?
|
59
|
+
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
60
|
+
else
|
61
|
+
symbol_string = $2.dup
|
62
|
+
mass_diff = $3.to_f
|
63
|
+
$1.split('').each do |aa|
|
64
|
+
aa_as_sym = aa.to_sym
|
65
|
+
@aa_mod_to_tot_mass[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
66
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
# returns an array of static mod objects and static terminal mod objects
|
73
|
+
def create_static_mods(params)
|
74
|
+
|
75
|
+
####################################
|
76
|
+
## static mods
|
77
|
+
####################################
|
78
|
+
|
79
|
+
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
80
|
+
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
81
|
+
|
82
|
+
params.mods.each do |k,v|
|
83
|
+
v_to_f = v.to_f
|
84
|
+
if v_to_f != 0.0
|
85
|
+
if k =~ /add_(\w)_/
|
86
|
+
static_mods << [$1.to_sym, v_to_f]
|
87
|
+
else
|
88
|
+
static_terminal_mods << [k, v_to_f]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
aa_hash = params.mass_index(:precursor)
|
93
|
+
|
94
|
+
## Create the static_mods objects
|
95
|
+
static_mods.map! do |mod|
|
96
|
+
hash = {
|
97
|
+
:aminoacid => mod[0].to_s,
|
98
|
+
:massdiff => mod[1],
|
99
|
+
:mass => aa_hash[mod[0]] + mod[1],
|
100
|
+
:variable => 'N',
|
101
|
+
:binary => 'Y',
|
102
|
+
}
|
103
|
+
Mspire::Ident::Pepxml::AminoacidModification.new(hash)
|
104
|
+
end
|
105
|
+
|
106
|
+
## Create the static_terminal_mods objects
|
107
|
+
static_terminal_mods.map! do |mod|
|
108
|
+
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
109
|
+
else ; 'n' # only two possible termini
|
110
|
+
end
|
111
|
+
protein_terminus = case mod[0]
|
112
|
+
when /Nterm_protein/ ; 'n'
|
113
|
+
when /Cterm_protein/ ; 'c'
|
114
|
+
else nil
|
115
|
+
end
|
116
|
+
|
117
|
+
# create the hash
|
118
|
+
hash = {
|
119
|
+
:terminus => terminus,
|
120
|
+
:massdiff => mod[1],
|
121
|
+
:variable => 'N',
|
122
|
+
:description => mod[0],
|
123
|
+
}
|
124
|
+
hash[:protein_terminus] = protein_terminus if protein_terminus
|
125
|
+
Mspire::Ident::Pepxml::TerminalModification.new(hash)
|
126
|
+
end
|
127
|
+
[static_mods, static_terminal_mods]
|
128
|
+
end
|
129
|
+
|
130
|
+
# 1. sets aa_mods and term_mods from a sequest params object
|
131
|
+
# 2. sets @params
|
132
|
+
# 3. sets @aa_mod_to_tot_mass
|
133
|
+
def set_modifications(params, modification_symbols_string)
|
134
|
+
@params = params
|
135
|
+
|
136
|
+
set_hashes(modification_symbols_string)
|
137
|
+
(static_mods, static_terminal_mods) = create_static_mods(params)
|
138
|
+
|
139
|
+
aa_hash = params.mass_index(:precursor)
|
140
|
+
#################################
|
141
|
+
# Variable Mods:
|
142
|
+
#################################
|
143
|
+
arr = params.diff_search_options.rstrip.split(/\s+/)
|
144
|
+
# [aa.to_sym, diff.to_f]
|
145
|
+
variable_mods = []
|
146
|
+
(0...arr.size).step(2) do |i|
|
147
|
+
if arr[i].to_f != 0.0
|
148
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
mod_objects = []
|
152
|
+
variable_mods.each do |mod|
|
153
|
+
mod[0].split('').each do |aa|
|
154
|
+
hash = {
|
155
|
+
|
156
|
+
:aminoacid => aa,
|
157
|
+
:massdiff => mod[1],
|
158
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
159
|
+
:variable => 'Y',
|
160
|
+
:binary => 'N',
|
161
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
162
|
+
}
|
163
|
+
mod_objects << Mspire::Ident::Pepxml::AminoacidModification.new(hash)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
variable_mods = mod_objects
|
168
|
+
#################################
|
169
|
+
# TERMINAL Variable Mods:
|
170
|
+
#################################
|
171
|
+
# These are always peptide, not protein termini (for sequest)
|
172
|
+
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
173
|
+
|
174
|
+
to_add = []
|
175
|
+
if nterm_diff != 0.0
|
176
|
+
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
177
|
+
end
|
178
|
+
if cterm_diff != 0.0
|
179
|
+
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
180
|
+
end
|
181
|
+
|
182
|
+
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
183
|
+
hash = {
|
184
|
+
:terminus => term,
|
185
|
+
:massdiff => mssdiff,
|
186
|
+
:variable => 'Y',
|
187
|
+
:symbol => symb,
|
188
|
+
}
|
189
|
+
Mspire::Ident::Pepxml::TerminalModification.new(hash)
|
190
|
+
end
|
191
|
+
|
192
|
+
#########################
|
193
|
+
# COLLECT THEM
|
194
|
+
#########################
|
195
|
+
@aa_mods = static_mods + variable_mods
|
196
|
+
@term_mods = static_terminal_mods + variable_terminal_mods
|
197
|
+
end
|
198
|
+
|
199
|
+
# takes a peptide sequence with modifications but no preceding or trailing
|
200
|
+
# amino acids. (e.g. expects "]PEPT*IDE" but not 'K.PEPTIDE.R')
|
201
|
+
# returns a ModificationInfo object
|
202
|
+
# if there are no modifications, returns nil
|
203
|
+
def modification_info(mod_peptide)
|
204
|
+
return nil if @aa_mod_to_tot_mass.size == 0
|
205
|
+
mod_info = Mspire::Ident::Pepxml::SearchHit::ModificationInfo.new( mod_peptide.dup )
|
206
|
+
mass_table = @params.mass_index(:precursor)
|
207
|
+
|
208
|
+
# TERMINI:
|
209
|
+
## only the termini can match a single char
|
210
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[0,1]
|
211
|
+
# AA + H + differential_mod
|
212
|
+
mod_info.mod_nterm_mass = mass_table[mod_peptide[1,1].to_sym] + mass_table['h+'] + @aa_mod_to_tot_mass[mod_peptide[0,1]]
|
213
|
+
mod_peptide = mod_peptide[1...(mod_peptide.size)]
|
214
|
+
end
|
215
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[(mod_peptide.size-1),1]
|
216
|
+
# AA + OH + differential_mod
|
217
|
+
mod_info.mod_cterm_mass = mass_table[mod_peptide[(mod_peptide.size-2),1].to_sym] + mass_table['oh'] + @aa_mod_to_tot_mass[mod_peptide[-1,1]]
|
218
|
+
mod_peptide = mod_peptide[0...(mod_peptide.size-1)]
|
219
|
+
end
|
220
|
+
|
221
|
+
# OTHER DIFFERENTIAL MODS:
|
222
|
+
mod_array = []
|
223
|
+
mod_cnt = 1
|
224
|
+
bare_cnt = 1
|
225
|
+
last_normal_aa = mod_peptide[0,1]
|
226
|
+
(1...mod_peptide.size).each do |i|
|
227
|
+
if @aa_mod_to_tot_mass.key?( last_normal_aa + mod_peptide[i,1] )
|
228
|
+
# we don't save the result because most amino acids will not be
|
229
|
+
# modified
|
230
|
+
mod_array << Mspire::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(bare_cnt, @aa_mod_to_tot_mass[last_normal_aa + mod_peptide[i,1]])
|
231
|
+
else
|
232
|
+
last_normal_aa = mod_peptide[i,1]
|
233
|
+
bare_cnt += 1
|
234
|
+
end
|
235
|
+
mod_cnt += 1
|
236
|
+
end
|
237
|
+
if mod_cnt == bare_cnt
|
238
|
+
nil
|
239
|
+
else
|
240
|
+
mod_info.mod_aminoacid_masses = mod_array if mod_array.size > 0
|
241
|
+
mod_info
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
end
|
247
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
|
2
|
+
module Mspire ; end
|
3
|
+
module Mspire::Sequest ; end
|
4
|
+
|
5
|
+
class Mspire::Sequest::Params
|
6
|
+
|
7
|
+
# returns a Mspire::Ident::Pepxml::SampleEnzyme object
|
8
|
+
def sample_enzyme
|
9
|
+
Mspire::Ident::Pepxml::SampleEnzyme.new(sample_enzyme_hash)
|
10
|
+
end
|
11
|
+
|
12
|
+
# returns a hash suitable for setting a Mspire::Ident::Pepxml::SampleEnzyme object
|
13
|
+
def sample_enzyme_hash
|
14
|
+
(offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
|
15
|
+
if v == '' ; nil ; else v end
|
16
|
+
end
|
17
|
+
hash = {}
|
18
|
+
hash[:name] = self.enzyme
|
19
|
+
hash[:cut] = cleave_at
|
20
|
+
hash[:no_cut] = except_if_after
|
21
|
+
hash[:sense] =
|
22
|
+
if hash[:name] == "No_Enzyme"
|
23
|
+
nil
|
24
|
+
elsif offset == 1
|
25
|
+
'C'
|
26
|
+
elsif offset == 0
|
27
|
+
'N'
|
28
|
+
end
|
29
|
+
hash
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,393 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
require 'mspire/fasta'
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
require 'mspire/ident/peptide'
|
7
|
+
require 'mspire/ident/search'
|
8
|
+
|
9
|
+
module Mspire
|
10
|
+
module Sequest
|
11
|
+
class SqtGroup
|
12
|
+
include Mspire::Ident::SearchGroup
|
13
|
+
|
14
|
+
#attr_accessor :sqts, :filenames
|
15
|
+
|
16
|
+
def search_class
|
17
|
+
Mspire::Sequest::Sqt
|
18
|
+
end
|
19
|
+
|
20
|
+
def extension() 'sqg' end
|
21
|
+
|
22
|
+
def initialize(arg, opts={}, &block)
|
23
|
+
orig_opts = opts.dup
|
24
|
+
indiv_opts = { :link_protein_hits => false }
|
25
|
+
super(arg, opts.merge(indiv_opts)) do
|
26
|
+
unless orig_opts[:link_protein_hits] == false
|
27
|
+
puts "MERGING GROUP!"
|
28
|
+
(@peptides, @proteins) = merge!(@searches.map {|v| v.peptides }, &Mspire::Sequest::Sqt::NEW_PROT)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
block.call(self) if block_given?
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# # NOTE THAT this is copy/paste from srf.rb, should be refactored...
|
36
|
+
## returns the filename used
|
37
|
+
## if the file exists, the name will be expanded to full path, otherwise just
|
38
|
+
## what is given
|
39
|
+
#def to_sqg(sqg_filename='bioworks.sqg')
|
40
|
+
#File.open(sqg_filename, 'w') do |v|
|
41
|
+
#@filenames.each do |sqt_file|
|
42
|
+
#if File.exist? sqt_file
|
43
|
+
#v.puts File.expand_path(sqt_file)
|
44
|
+
#else
|
45
|
+
#v.puts sqt_file
|
46
|
+
#end
|
47
|
+
#end
|
48
|
+
#end
|
49
|
+
#sqg_filename
|
50
|
+
#end
|
51
|
+
|
52
|
+
end # SqtGroup
|
53
|
+
|
54
|
+
|
55
|
+
class Sqt
|
56
|
+
include Mspire::Ident::SearchLike
|
57
|
+
PercolatorHeaderMatch = /^Percolator v/
|
58
|
+
Delimiter = "\t"
|
59
|
+
attr_accessor :header
|
60
|
+
attr_accessor :spectra
|
61
|
+
attr_accessor :base_name
|
62
|
+
# boolean
|
63
|
+
attr_accessor :percolator_results
|
64
|
+
|
65
|
+
# returns [sequence_length, locus_count] of the fasta file
|
66
|
+
def self.db_seq_length_and_locus_count(dbfile)
|
67
|
+
total_sequence_length = 0
|
68
|
+
fastasize = 0
|
69
|
+
Mspire::Fasta.open(dbfile) do |fasta|
|
70
|
+
fasta.each do |entry|
|
71
|
+
total_sequence_length += entry.sequence.size
|
72
|
+
fastasize += 1
|
73
|
+
end
|
74
|
+
end
|
75
|
+
[total_sequence_length, fastasize]
|
76
|
+
end
|
77
|
+
|
78
|
+
#--
|
79
|
+
# this is implemented separate from sequence length because seq length
|
80
|
+
# uses Archive which doesn't preserve carriage returns and newlines.
|
81
|
+
#++
|
82
|
+
def self.db_md5sum(dbfile)
|
83
|
+
chunksize = 61440
|
84
|
+
digest = Digest::MD5.new
|
85
|
+
File.open(dbfile) do |io|
|
86
|
+
while chunk = io.read(chunksize)
|
87
|
+
digest << chunk
|
88
|
+
end
|
89
|
+
end
|
90
|
+
digest.hexdigest
|
91
|
+
end
|
92
|
+
|
93
|
+
# assumes the file exists and is readable
|
94
|
+
# returns [DBSeqLength, DBLocusCount, DBMD5Sum]
|
95
|
+
def self.db_info(dbfile)
|
96
|
+
# returns the 3 member array
|
97
|
+
self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
|
98
|
+
end
|
99
|
+
|
100
|
+
def protein_class
|
101
|
+
Mspire::Sequest::Sqt::Locus
|
102
|
+
end
|
103
|
+
|
104
|
+
# opts =
|
105
|
+
# :percolator_results => false | true (default false)
|
106
|
+
# :link_protein_hits => true | false (default true)
|
107
|
+
def initialize(filename=nil, opts={})
|
108
|
+
peptide_hits = []
|
109
|
+
if filename
|
110
|
+
from_file(filename, opts)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
NEW_PROT = lambda do |_prot, _peptides|
|
115
|
+
Mspire::Sequest::Sqt::Locus.new(_prot.locus, _prot.description, _peptides)
|
116
|
+
end
|
117
|
+
|
118
|
+
# if the file contains the header key '/$Percolator v/' then the results
|
119
|
+
# will be interpreted as percolator results regardless of the value
|
120
|
+
# passed in.
|
121
|
+
def from_file(filename, opts={})
|
122
|
+
opts = {:percolator_results=>false, :link_protein_hits => true}.merge(opts)
|
123
|
+
@percolator_results = opts[:percolator_results]
|
124
|
+
@base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
|
125
|
+
File.open(filename) do |fh|
|
126
|
+
@header = Mspire::Sequest::Sqt::Header.new.from_handle(fh)
|
127
|
+
if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
|
128
|
+
@percolator_results = true
|
129
|
+
end
|
130
|
+
(@spectra, @peptides) = Mspire::Sequest::Sqt::Spectrum.spectra_from_handle(fh, @base_name, @percolator_results)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Inherits from hash, so all header stuff can be accessed by key. Multiline
|
136
|
+
# values will be pushed into an array.
|
137
|
+
# All header values are stored as (newline-removed) strings!
|
138
|
+
class Header < Hash
|
139
|
+
Leader = 'H'
|
140
|
+
|
141
|
+
# These will be in arrays no matter what: StaticMod, DynamicMod, Comment
|
142
|
+
# Any other keys repeated will be shoved into an array; otherwise a string
|
143
|
+
Arrayed = %w(DyanmicMod StaticMod Comment).to_set
|
144
|
+
|
145
|
+
HeaderKeys = {
|
146
|
+
:sqt_generator => 'SQTGenerator',
|
147
|
+
:sqt_generator_version => 'SQTGeneratorVersion',
|
148
|
+
:database => 'Database',
|
149
|
+
:fragment_masses => 'FragmentMasses',
|
150
|
+
:precursor_masses => 'PrecursorMasses',
|
151
|
+
:start_time => 'StartTime',
|
152
|
+
:db_seq_length => 'DBSeqLength',
|
153
|
+
:db_locus_count => 'DBLocusCount',
|
154
|
+
:db_md5sum => 'DBMD5Sum',
|
155
|
+
:peptide_mass_tolerance => 'Alg-PreMassTol',
|
156
|
+
:fragment_ion_tolerance => 'Alg-FragMassTol',
|
157
|
+
# nonstandard (mine)
|
158
|
+
:peptide_mass_units => 'Alg-PreMassUnits',
|
159
|
+
:ion_series => 'Alg-IonSeries',
|
160
|
+
:enzyme => 'Alg-Enzyme',
|
161
|
+
# nonstandard (mine)
|
162
|
+
:ms_model => 'Alg-MSModel',
|
163
|
+
:static_mods => 'StaticMod',
|
164
|
+
:dynamic_mods => 'DynamicMod',
|
165
|
+
:comments => 'Comment'
|
166
|
+
}
|
167
|
+
|
168
|
+
|
169
|
+
KeysToAtts = HeaderKeys.invert
|
170
|
+
|
171
|
+
HeaderKeys.keys.each do |ky|
|
172
|
+
attr_accessor ky
|
173
|
+
end
|
174
|
+
|
175
|
+
def from_handle(fh)
|
176
|
+
Arrayed.each do |ky|
|
177
|
+
self[ky] = []
|
178
|
+
end
|
179
|
+
pos = fh.pos
|
180
|
+
lines = []
|
181
|
+
loop do
|
182
|
+
line = fh.gets
|
183
|
+
if line && (line[0,1] == Mspire::Sequest::Sqt::Header::Leader )
|
184
|
+
lines << line
|
185
|
+
else # reset the fh.pos and we're done
|
186
|
+
fh.pos = pos
|
187
|
+
break
|
188
|
+
end
|
189
|
+
pos = fh.pos
|
190
|
+
end
|
191
|
+
from_lines(lines)
|
192
|
+
end
|
193
|
+
|
194
|
+
def from_lines(array_of_header_lines)
|
195
|
+
array_of_header_lines.each do |line|
|
196
|
+
line.chomp!
|
197
|
+
(ky, *rest) = line.split(Mspire::Sequest::Sqt::Delimiter)[1..-1]
|
198
|
+
# just in case they have any tabs in their field
|
199
|
+
value = rest.join(Mspire::Sequest::Sqt::Delimiter)
|
200
|
+
if Arrayed.include?(ky)
|
201
|
+
self[ky] << value
|
202
|
+
elsif self.key? ky # already exists
|
203
|
+
if self[ky].is_a? Array
|
204
|
+
self[ky] << value
|
205
|
+
else
|
206
|
+
self[ky] = [self[ky], value]
|
207
|
+
end
|
208
|
+
else # normal
|
209
|
+
self[ky] = value
|
210
|
+
end
|
211
|
+
end
|
212
|
+
KeysToAtts.each do |ky,methd|
|
213
|
+
self.send("#{methd}=".to_sym, self[ky])
|
214
|
+
end
|
215
|
+
self
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
# all are cast as expected (total_intensity is a float)
|
224
|
+
# mh = observed mh
|
225
|
+
Mspire::Sequest::Sqt::Spectrum = Struct.new(* %w(first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches).map(&:to_sym) )
|
226
|
+
|
227
|
+
# 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
|
228
|
+
|
229
|
+
class Mspire::Sequest::Sqt::Spectrum
|
230
|
+
Leader = 'S'
|
231
|
+
|
232
|
+
# assumes the first line starts with an 'S'
|
233
|
+
def self.spectra_from_handle(fh, base_name, percolator_results=false)
|
234
|
+
peptides = []
|
235
|
+
spectra = []
|
236
|
+
|
237
|
+
while line = fh.gets
|
238
|
+
case line[0,1]
|
239
|
+
when Mspire::Sequest::Sqt::Spectrum::Leader
|
240
|
+
spectrum = Mspire::Sequest::Sqt::Spectrum.new.from_line( line )
|
241
|
+
spectra << spectrum
|
242
|
+
matches = []
|
243
|
+
spectrum.matches = matches
|
244
|
+
when Mspire::Sequest::Sqt::Match::Leader
|
245
|
+
match_klass = if percolator_results
|
246
|
+
Mspire::Sequest::Sqt::Match::Percolator
|
247
|
+
else
|
248
|
+
Mspire::Sequest::Sqt::Match
|
249
|
+
end
|
250
|
+
match = match_klass.new.from_line( line )
|
251
|
+
#match[10,3] = spectrum[0,3]
|
252
|
+
# structs cannot set multiple values at a time :(
|
253
|
+
match[10] = spectrum[0]
|
254
|
+
match[11] = spectrum[1]
|
255
|
+
match[12] = spectrum[2]
|
256
|
+
match[15] = base_name
|
257
|
+
matches << match
|
258
|
+
peptides << match
|
259
|
+
loci = []
|
260
|
+
match.loci = loci
|
261
|
+
matches << match
|
262
|
+
when Mspire::Sequest::Sqt::Locus::Leader
|
263
|
+
line.chomp!
|
264
|
+
key = line.split(Mspire::Sequest::Sqt::Delimiter)[1]
|
265
|
+
locus = Mspire::Sequest::Sqt::Locus.from_line( line )
|
266
|
+
loci << locus
|
267
|
+
end
|
268
|
+
end
|
269
|
+
# set the deltacn:
|
270
|
+
set_deltacn(spectra)
|
271
|
+
[spectra, peptides]
|
272
|
+
end
|
273
|
+
|
274
|
+
def self.set_deltacn(spectra)
|
275
|
+
spectra.each do |spec|
|
276
|
+
matches = spec.matches
|
277
|
+
if matches.size > 0
|
278
|
+
|
279
|
+
(0...(matches.size-1)).each do |i|
|
280
|
+
matches[i].deltacn = matches[i+1].deltacn_orig
|
281
|
+
end
|
282
|
+
matches[-1].deltacn = 1.1
|
283
|
+
end
|
284
|
+
end
|
285
|
+
spectra
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
# returns an array -> [the next spectra line (or nil if eof), spectrum]
|
290
|
+
def from_line(line)
|
291
|
+
line.chomp!
|
292
|
+
ar = line.split(Mspire::Sequest::Sqt::Delimiter)
|
293
|
+
self[0] = ar[1].to_i
|
294
|
+
self[1] = ar[2].to_i
|
295
|
+
self[2] = ar[3].to_i
|
296
|
+
self[3] = ar[4].to_f
|
297
|
+
self[4] = ar[5]
|
298
|
+
self[5] = ar[6].to_f
|
299
|
+
self[6] = ar[7].to_f
|
300
|
+
self[7] = ar[8].to_f
|
301
|
+
self[8] = ar[9].to_i
|
302
|
+
self[9] = []
|
303
|
+
self
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
# Sqt format uses only indices 0 - 9
|
308
|
+
Mspire::Sequest::Sqt::Match = Struct.new( *%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci].map(&:to_sym) )
|
309
|
+
|
310
|
+
# 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
|
311
|
+
|
312
|
+
# rxcorr = rank by xcorr
|
313
|
+
# rsp = rank by sp
|
314
|
+
# NOTE:
|
315
|
+
# deltacn_orig
|
316
|
+
# deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
|
317
|
+
# give the last one 1.1)
|
318
|
+
class Mspire::Sequest::Sqt::Match
|
319
|
+
Leader = 'M'
|
320
|
+
|
321
|
+
# same as 'loci'
|
322
|
+
def proteins
|
323
|
+
self[16]
|
324
|
+
end
|
325
|
+
|
326
|
+
def from_line(line)
|
327
|
+
line.chomp!
|
328
|
+
ar = line.split(Mspire::Sequest::Sqt::Delimiter)
|
329
|
+
self[0] = ar[1].to_i
|
330
|
+
self[1] = ar[2].to_i
|
331
|
+
self[2] = ar[3].to_f
|
332
|
+
self[3] = ar[4].to_f
|
333
|
+
self[4] = ar[5].to_f
|
334
|
+
self[5] = ar[6].to_f
|
335
|
+
self[6] = ar[7].to_i
|
336
|
+
self[7] = ar[8].to_i
|
337
|
+
self[8] = ar[9]
|
338
|
+
self[9] = ar[10]
|
339
|
+
self[14] = Mspire::Ident::Peptide.sequence_to_aaseq(self[8])
|
340
|
+
self
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
|
345
|
+
class Mspire::Sequest::Sqt::Match::Percolator < Mspire::Sequest::Sqt::Match
|
346
|
+
# we will keep access to these old terms since we can then access routines
|
347
|
+
# that sort on xcorr...
|
348
|
+
#undef_method :xcorr
|
349
|
+
#undef_method :xcorr=
|
350
|
+
#undef_method :sp
|
351
|
+
#undef_method :sp=
|
352
|
+
|
353
|
+
def percolator_score
|
354
|
+
self[4]
|
355
|
+
end
|
356
|
+
def percolator_score=(score)
|
357
|
+
self[4] = score
|
358
|
+
end
|
359
|
+
def negative_q_value
|
360
|
+
self[5]
|
361
|
+
end
|
362
|
+
def negative_q_value=(arg)
|
363
|
+
self[5] = arg
|
364
|
+
end
|
365
|
+
def q_value
|
366
|
+
-self[5]
|
367
|
+
end
|
368
|
+
# for compatibility with scripts that want this guy
|
369
|
+
def probability
|
370
|
+
-self[5]
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
Mspire::Sequest::Sqt::Locus = Struct.new( :locus, :description )
|
375
|
+
|
376
|
+
class Mspire::Sequest::Sqt::Locus
|
377
|
+
Leader = 'L'
|
378
|
+
|
379
|
+
def first_entry ; self[0] end
|
380
|
+
def reference ; self[0] end
|
381
|
+
def id ; self[0] end
|
382
|
+
|
383
|
+
def initialize(locus=nil, description=nil, peptides=[])
|
384
|
+
super(locus, description)
|
385
|
+
end
|
386
|
+
|
387
|
+
# returns a new Locus object
|
388
|
+
def self.from_line(line)
|
389
|
+
line.chomp!
|
390
|
+
self.new( *line.split(Mspire::Sequest::Sqt::Delimiter) ) # fills in the first two values
|
391
|
+
end
|
392
|
+
|
393
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Mspire ; end
|
2
|
+
module Mspire::Ident ; end
|
3
|
+
|
4
|
+
class Mspire::Ident::Pepxml
|
5
|
+
class SearchHit
|
6
|
+
Sequest = Struct.new(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank) do
|
7
|
+
|
8
|
+
# Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
|
9
|
+
def self.split_ions(ions)
|
10
|
+
ions.split("/").map {|ion| ion.to_i }
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_xml(builder)
|
14
|
+
members.zip(self.to_a) do |sym, val|
|
15
|
+
builder.search_score(:name => sym, :value => val)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|