mspire 0.3.1 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +2 -2
- data/bin/bioworks_to_pepxml.rb +15 -3
- data/bin/ms_to_lmat.rb +2 -1
- data/bin/sqt_group.rb +26 -0
- data/changelog.txt +36 -0
- data/lib/ms/msrun.rb +3 -1
- data/lib/ms/parser/mzdata/dom.rb +14 -14
- data/lib/ms/scan.rb +3 -3
- data/lib/mspire.rb +1 -1
- data/lib/sample_enzyme.rb +39 -0
- data/lib/spec_id.rb +18 -0
- data/lib/spec_id/aa_freqs.rb +6 -9
- data/lib/spec_id/digestor.rb +16 -17
- data/lib/spec_id/mass.rb +63 -1
- data/lib/spec_id/parser/proph.rb +101 -2
- data/lib/spec_id/precision/filter.rb +3 -2
- data/lib/spec_id/precision/filter/cmdline.rb +3 -1
- data/lib/spec_id/precision/filter/output.rb +1 -0
- data/lib/spec_id/precision/prob.rb +88 -21
- data/lib/spec_id/precision/prob/cmdline.rb +28 -16
- data/lib/spec_id/precision/prob/output.rb +8 -2
- data/lib/spec_id/proph/pep_summary.rb +25 -12
- data/lib/spec_id/sequest.rb +28 -0
- data/lib/spec_id/sequest/pepxml.rb +142 -197
- data/lib/spec_id/sqt.rb +349 -0
- data/lib/spec_id/srf.rb +33 -23
- data/lib/validator.rb +40 -57
- data/lib/validator/aa.rb +3 -90
- data/lib/validator/aa_est.rb +112 -0
- data/lib/validator/cmdline.rb +163 -31
- data/lib/validator/decoy.rb +15 -7
- data/lib/validator/digestion_based.rb +5 -4
- data/lib/validator/q_value.rb +32 -0
- data/script/peps_per_bin.rb +67 -0
- data/script/sqt_to_meta.rb +24 -0
- data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
- data/specs/bin/fasta_shaker_spec.rb +2 -2
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
- data/specs/bin/filter_and_validate_spec.rb +25 -6
- data/specs/bin/ms_to_lmat_spec.rb +2 -2
- data/specs/bin/prob_validate_spec.rb +5 -3
- data/specs/sample_enzyme_spec.rb +86 -1
- data/specs/spec_helper.rb +11 -9
- data/specs/spec_id/bioworks_spec.rb +2 -1
- data/specs/spec_id/precision/filter_spec.rb +5 -5
- data/specs/spec_id/precision/prob_spec.rb +0 -67
- data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
- data/specs/spec_id/protein_summary_spec.rb +4 -4
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
- data/specs/spec_id/sequest_spec.rb +38 -0
- data/specs/spec_id/sqt_spec.rb +111 -3
- data/specs/spec_id_spec.rb +2 -0
- data/specs/transmem/phobius_spec.rb +3 -1
- data/specs/transmem/toppred_spec.rb +1 -1
- data/specs/validator/aa_est_spec.rb +66 -0
- data/specs/validator/aa_spec.rb +1 -68
- data/specs/validator/background_spec.rb +2 -0
- data/specs/validator/bias_spec.rb +3 -27
- data/specs/validator/decoy_spec.rb +2 -2
- data/specs/validator/transmem_spec.rb +2 -1
- data/test_files/small.sqt +87 -0
- metadata +312 -293
data/lib/spec_id/sqt.rb
ADDED
@@ -0,0 +1,349 @@
|
|
1
|
+
require 'spec_id'
|
2
|
+
require 'array_class'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
class SQTGroup
|
6
|
+
include SpecID # inherits prots and peps accessors
|
7
|
+
|
8
|
+
attr_accessor :sqts, :filenames
|
9
|
+
|
10
|
+
# if filenames is a String, then it should be a filename to a file ending in
|
11
|
+
# '.sqg' (meta text file with list of .sqt files) else it should be an array
|
12
|
+
# of sqt filenames
|
13
|
+
def initialize(filenames=nil)
|
14
|
+
@filenames = filenames
|
15
|
+
@prots = []
|
16
|
+
@peps = []
|
17
|
+
@sqts = []
|
18
|
+
|
19
|
+
global_ref_hash = {}
|
20
|
+
## This is duplicated in SRFGroup (should refactor eventually)
|
21
|
+
if filenames
|
22
|
+
if filenames.is_a?(String) && filenames =~ /\.sqg$/
|
23
|
+
srg_filename = filenames.dup
|
24
|
+
@filename = srg_filename
|
25
|
+
@filenames = IO.readlines(filenames).grep(/\w/).map {|v| v.chomp }
|
26
|
+
@filenames.each do |file|
|
27
|
+
if !File.exist? file
|
28
|
+
puts "File: #{file} in #{srg_filename} does not exist!"
|
29
|
+
puts "Please modify #{srg_filename} to point to existing files."
|
30
|
+
abort
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
@filenames.each do |file|
|
35
|
+
@sqts << SQT.new(file, @peps, global_ref_hash)
|
36
|
+
end
|
37
|
+
|
38
|
+
@prots = global_ref_hash.values
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# NOTE THAT this is copy/paste from srf.rb, should be refactored...
|
43
|
+
# returns the filename used
|
44
|
+
# if the file exists, the name will be expanded to full path, otherwise just
|
45
|
+
# what is given
|
46
|
+
def to_sqg(sqg_filename='bioworks.sqg')
|
47
|
+
File.open(sqg_filename, 'w') do |v|
|
48
|
+
@filenames.each do |sqt_file|
|
49
|
+
if File.exist? sqt_file
|
50
|
+
v.puts File.expand_path(sqt_file)
|
51
|
+
else
|
52
|
+
v.puts sqt_file
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
sqg_filename
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
class SQT
|
62
|
+
PercolatorHeaderMatch = /^Percolator v/
|
63
|
+
Delimiter = "\t"
|
64
|
+
attr_accessor :header
|
65
|
+
attr_accessor :spectra
|
66
|
+
attr_accessor :base_name
|
67
|
+
# boolean
|
68
|
+
attr_accessor :percolator_results
|
69
|
+
|
70
|
+
def initialize(filename=nil, peps=[], global_ref_hash={})
|
71
|
+
if filename
|
72
|
+
from_file(filename, peps, global_ref_hash)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# if the file contains the header key '/$Percolator v/' then the results
|
77
|
+
# will be interpreted as percolator results
|
78
|
+
def from_file(filename, peps=[], global_ref_hash={}, percolator_results=false)
|
79
|
+
@percolator_results = percolator_results
|
80
|
+
@base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
|
81
|
+
File.open(filename) do |fh|
|
82
|
+
@header = SQT::Header.new.from_handle(fh)
|
83
|
+
if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
|
84
|
+
@percolator_results = true
|
85
|
+
end
|
86
|
+
@spectra = SQT::Spectrum.spectra_from_handle(fh, @base_name, peps, global_ref_hash, @percolator_results)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
# Inherits from hash, so all header stuff can be accessed by key. Multiline
|
93
|
+
# values will be pushed into an array.
|
94
|
+
# All header values are stored as (newline-removed) strings!
|
95
|
+
class SQT::Header < Hash
|
96
|
+
Leader = 'H'
|
97
|
+
|
98
|
+
# These will be in arrays no matter what: StaticMod, DynamicMod, Comment
|
99
|
+
# Any other keys repeated will be shoved into an array; otherwise a string
|
100
|
+
Arrayed = %w(DyanmicMod StaticMod Comment).to_set
|
101
|
+
|
102
|
+
HeaderKeys = {
|
103
|
+
:sqt_generator => 'SQTGenerator',
|
104
|
+
:sqt_generator_version => 'SQTGeneratorVersion',
|
105
|
+
:database => 'Database',
|
106
|
+
:fragment_masses => 'FragmentMasses',
|
107
|
+
:precursor_masses => 'PrecursorMasses',
|
108
|
+
:start_time => 'StartTime',
|
109
|
+
:db_seq_length => 'DBSeqLength',
|
110
|
+
:db_locus_count => 'DBLocusCount',
|
111
|
+
:db_md5sum => 'DBMD5Sum',
|
112
|
+
:peptide_mass_tolerance => 'Alg-PreMassTol',
|
113
|
+
:fragment_ion_tolerance => 'Alg-FragMassTol',
|
114
|
+
# nonstandard (mine)
|
115
|
+
:peptide_mass_units => 'Alg-PreMassUnits',
|
116
|
+
:ion_series => 'Alg-IonSeries',
|
117
|
+
:enzyme => 'Alg-Enzyme',
|
118
|
+
# nonstandard (mine)
|
119
|
+
:ms_model => 'Alg-MSModel',
|
120
|
+
:static_mods => 'StaticMod',
|
121
|
+
:dynamic_mods => 'DynamicMod',
|
122
|
+
:comments => 'Comment'
|
123
|
+
}
|
124
|
+
|
125
|
+
|
126
|
+
KeysToAtts = HeaderKeys.invert
|
127
|
+
|
128
|
+
HeaderKeys.keys.each do |ky|
|
129
|
+
attr_accessor ky
|
130
|
+
end
|
131
|
+
|
132
|
+
def from_handle(fh)
|
133
|
+
Arrayed.each do |ky|
|
134
|
+
self[ky] = []
|
135
|
+
end
|
136
|
+
pos = fh.pos
|
137
|
+
lines = []
|
138
|
+
loop do
|
139
|
+
line = fh.gets
|
140
|
+
if line && (line[0,1] == SQT::Header::Leader )
|
141
|
+
lines << line
|
142
|
+
else # reset the fh.pos and we're done
|
143
|
+
fh.pos = pos
|
144
|
+
break
|
145
|
+
end
|
146
|
+
pos = fh.pos
|
147
|
+
end
|
148
|
+
from_lines(lines)
|
149
|
+
end
|
150
|
+
|
151
|
+
def from_lines(array_of_header_lines)
|
152
|
+
array_of_header_lines.each do |line|
|
153
|
+
line.chomp!
|
154
|
+
(ky, *rest) = line.split(SQT::Delimiter)[1..-1]
|
155
|
+
# just in case they have any tabs in their field
|
156
|
+
value = rest.join(SQT::Delimiter)
|
157
|
+
if Arrayed.include?(ky)
|
158
|
+
self[ky] << value
|
159
|
+
elsif self.key? ky # already exists
|
160
|
+
if self[ky].is_a? Array
|
161
|
+
self[ky] << value
|
162
|
+
else
|
163
|
+
self[ky] = [self[ky], value]
|
164
|
+
end
|
165
|
+
else # normal
|
166
|
+
self[ky] = value
|
167
|
+
end
|
168
|
+
end
|
169
|
+
KeysToAtts.each do |ky,methd|
|
170
|
+
self.send("#{methd}=".to_sym, self[ky])
|
171
|
+
end
|
172
|
+
self
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
# all are cast as expected (total_intensity is a float)
|
178
|
+
# mh = observed mh
|
179
|
+
SQT::Spectrum = ArrayClass.new(%w[first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches])
|
180
|
+
|
181
|
+
# 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
|
182
|
+
|
183
|
+
class SQT::Spectrum
|
184
|
+
Leader = 'S'
|
185
|
+
|
186
|
+
# assumes the first line starts with an 'S'
|
187
|
+
def self.spectra_from_handle(fh, base_name, peps=[], global_ref_hash={}, percolator_results=false)
|
188
|
+
spectra = []
|
189
|
+
|
190
|
+
while line = fh.gets
|
191
|
+
case line[0,1]
|
192
|
+
when SQT::Spectrum::Leader
|
193
|
+
spectrum = SQT::Spectrum.new.from_line( line )
|
194
|
+
spectra << spectrum
|
195
|
+
matches = []
|
196
|
+
spectrum.matches = matches
|
197
|
+
when SQT::Match::Leader
|
198
|
+
match_klass = if percolator_results
|
199
|
+
SQT::Match::Percolator
|
200
|
+
else
|
201
|
+
SQT::Match
|
202
|
+
end
|
203
|
+
match = match_klass.new.from_line( line )
|
204
|
+
match[10,3] = spectrum[0,3]
|
205
|
+
match[15] = base_name
|
206
|
+
matches << match
|
207
|
+
peps << match
|
208
|
+
loci = []
|
209
|
+
match.loci = loci
|
210
|
+
matches << match
|
211
|
+
when SQT::Locus::Leader
|
212
|
+
line.chomp!
|
213
|
+
key = line.split(SQT::Delimiter)[1]
|
214
|
+
locus =
|
215
|
+
if global_ref_hash.key?(key)
|
216
|
+
global_ref_hash[key]
|
217
|
+
else
|
218
|
+
locus = SQT::Locus.new.from_line( line )
|
219
|
+
locus.peps = []
|
220
|
+
global_ref_hash[key] = locus
|
221
|
+
end
|
222
|
+
locus.peps << match
|
223
|
+
loci << locus
|
224
|
+
end
|
225
|
+
end
|
226
|
+
# set the deltacn:
|
227
|
+
set_deltacn(spectra)
|
228
|
+
spectra
|
229
|
+
end
|
230
|
+
|
231
|
+
def self.set_deltacn(spectra)
|
232
|
+
spectra.each do |spec|
|
233
|
+
matches = spec.matches
|
234
|
+
if matches.size > 0
|
235
|
+
|
236
|
+
(0...(matches.size-1)).each do |i|
|
237
|
+
matches[i].deltacn = matches[i+1].deltacn_orig
|
238
|
+
end
|
239
|
+
matches[-1].deltacn = 1.1
|
240
|
+
end
|
241
|
+
end
|
242
|
+
spectra
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
# returns an array -> [the next spectra line (or nil if eof), spectrum]
|
247
|
+
def from_line(line)
|
248
|
+
line.chomp!
|
249
|
+
ar = line.split(SQT::Delimiter)
|
250
|
+
self[0] = ar[1].to_i
|
251
|
+
self[1] = ar[2].to_i
|
252
|
+
self[2] = ar[3].to_i
|
253
|
+
self[3] = ar[4].to_f
|
254
|
+
self[4] = ar[5]
|
255
|
+
self[5] = ar[6].to_f
|
256
|
+
self[6] = ar[7].to_f
|
257
|
+
self[7] = ar[8].to_f
|
258
|
+
self[8] = ar[9].to_i
|
259
|
+
self[9] = []
|
260
|
+
self
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# SQT format uses only indices 0 - 9
|
265
|
+
SQT::Match = ArrayClass.new(%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci])
|
266
|
+
|
267
|
+
# 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
|
268
|
+
|
269
|
+
# rxcorr = rank by xcorr
|
270
|
+
# rsp = rank by sp
|
271
|
+
# NOTE:
|
272
|
+
# deltacn_orig
|
273
|
+
# deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
|
274
|
+
# give the last one 1.1)
|
275
|
+
class SQT::Match
|
276
|
+
include SpecID::Pep
|
277
|
+
Leader = 'M'
|
278
|
+
|
279
|
+
# same as 'loci'
|
280
|
+
def prots
|
281
|
+
self[16]
|
282
|
+
end
|
283
|
+
|
284
|
+
def from_line(line)
|
285
|
+
line.chomp!
|
286
|
+
ar = line.split(SQT::Delimiter)
|
287
|
+
self[0] = ar[1].to_i
|
288
|
+
self[1] = ar[2].to_i
|
289
|
+
self[2] = ar[3].to_f
|
290
|
+
self[3] = ar[4].to_f
|
291
|
+
self[4] = ar[5].to_f
|
292
|
+
self[5] = ar[6].to_f
|
293
|
+
self[6] = ar[7].to_i
|
294
|
+
self[7] = ar[8].to_i
|
295
|
+
self[8] = ar[9]
|
296
|
+
self[9] = ar[10]
|
297
|
+
self[14] = SpecID::Pep.sequence_to_aaseq(self[8])
|
298
|
+
self
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
|
303
|
+
class SQT::Match::Percolator < SQT::Match
|
304
|
+
# we will keep access to these old terms since we can then access routines
|
305
|
+
# that sort on xcorr...
|
306
|
+
#undef_method :xcorr
|
307
|
+
#undef_method :xcorr=
|
308
|
+
#undef_method :sp
|
309
|
+
#undef_method :sp=
|
310
|
+
|
311
|
+
def percolator_score
|
312
|
+
self[4]
|
313
|
+
end
|
314
|
+
def percolator_score=(score)
|
315
|
+
self[4] = score
|
316
|
+
end
|
317
|
+
def negative_q_value
|
318
|
+
self[5]
|
319
|
+
end
|
320
|
+
def negative_q_value=(arg)
|
321
|
+
self[5] = arg
|
322
|
+
end
|
323
|
+
def q_value
|
324
|
+
-self[5]
|
325
|
+
end
|
326
|
+
# for compatibility with scripts that want this guy
|
327
|
+
def probability
|
328
|
+
-self[5]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
SQT::Locus = ArrayClass.new(%w[locus description peps])
|
333
|
+
|
334
|
+
class SQT::Locus
|
335
|
+
include SpecID::Prot
|
336
|
+
Leader = 'L'
|
337
|
+
|
338
|
+
def first_entry ; self[0] end
|
339
|
+
def reference ; self[0] end
|
340
|
+
|
341
|
+
def from_line(line)
|
342
|
+
line.chomp!
|
343
|
+
ar = line.split(SQT::Delimiter)
|
344
|
+
self[0] = ar[1]
|
345
|
+
self[1] = ar[2]
|
346
|
+
self
|
347
|
+
end
|
348
|
+
|
349
|
+
end
|
data/lib/spec_id/srf.rb
CHANGED
@@ -3,7 +3,6 @@ require 'spec_id/sequest'
|
|
3
3
|
require 'fasta'
|
4
4
|
require 'mspire'
|
5
5
|
require 'set'
|
6
|
-
require 'fasta'
|
7
6
|
|
8
7
|
module BinaryReader
|
9
8
|
Null_char = "\0"[0] ## TODO: change for ruby 1.9 or 2.0
|
@@ -40,6 +39,7 @@ class SRFGroup
|
|
40
39
|
@prots = []
|
41
40
|
@srfs = []
|
42
41
|
|
42
|
+
# This is essentially duplicated in SQTGroup (should refactor eventually)
|
43
43
|
global_ref_hash = {}
|
44
44
|
if filenames
|
45
45
|
if filenames.is_a?(String) && filenames =~ /\.srg$/
|
@@ -102,6 +102,7 @@ class SRFGroup
|
|
102
102
|
end
|
103
103
|
|
104
104
|
@srfs.each do |srf|
|
105
|
+
srf.filtered_by_precursor_mass_tolerance = true
|
105
106
|
srf.out_files.each do |out_file|
|
106
107
|
hits = out_file.hits
|
107
108
|
before = hits.size
|
@@ -129,7 +130,7 @@ class SRFGroup
|
|
129
130
|
do_not_keep
|
130
131
|
end
|
131
132
|
if hits.size != before
|
132
|
-
SRF::OUT::Pep.
|
133
|
+
SRF::OUT::Pep.update_deltacns_from_xcorr(hits)
|
133
134
|
out_file.num_hits = hits.size
|
134
135
|
end
|
135
136
|
end
|
@@ -157,9 +158,9 @@ end
|
|
157
158
|
|
158
159
|
class SRF
|
159
160
|
|
160
|
-
# a string 3.3 or 3.2
|
161
|
+
# a string 3.5, 3.3 or 3.2
|
161
162
|
attr_accessor :version
|
162
|
-
|
163
|
+
|
163
164
|
attr_accessor :header
|
164
165
|
attr_accessor :dta_files
|
165
166
|
attr_accessor :out_files
|
@@ -170,8 +171,8 @@ class SRF
|
|
170
171
|
attr_accessor :base_name
|
171
172
|
# this is the global peptides array
|
172
173
|
attr_accessor :peps
|
173
|
-
|
174
|
-
attr_accessor :
|
174
|
+
|
175
|
+
attr_accessor :filtered_by_precursor_mass_tolerance
|
175
176
|
|
176
177
|
def dta_start_byte
|
177
178
|
case @version
|
@@ -321,7 +322,6 @@ class SRF
|
|
321
322
|
# * Number of sequences matching this precursor ion
|
322
323
|
#########################################
|
323
324
|
|
324
|
-
|
325
325
|
manual_validation_status = 'U'
|
326
326
|
self.out_files.zip(dta_files) do |out_file, dta_file|
|
327
327
|
# don't have the time to process (using 0.0 like bioworks 3.2)
|
@@ -337,17 +337,17 @@ class SRF
|
|
337
337
|
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
338
338
|
out_file.hits.each_with_index do |hit,index|
|
339
339
|
hit_mh = hit.mh
|
340
|
-
|
340
|
+
hit_deltacn_orig_updated = hit.deltacn_orig_updated
|
341
341
|
hit_xcorr = hit.xcorr
|
342
342
|
hit_sp = hit.sp
|
343
343
|
if opt[:round]
|
344
344
|
hit_mh = round(hit_mh, mh_dp)
|
345
|
-
|
345
|
+
hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
|
346
346
|
hit_xcorr = round(hit_xcorr, xcorr_dp)
|
347
347
|
hit_sp = round(hit_sp, sp_dp)
|
348
348
|
end
|
349
349
|
# note that the rank is determined by the order..
|
350
|
-
out.puts ['M', index+1, hit.rsp, hit_mh,
|
350
|
+
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
351
351
|
hit.prots.each do |prot|
|
352
352
|
out.puts ['L', prot.first_entry].join("\t")
|
353
353
|
end
|
@@ -647,10 +647,17 @@ class SRF::OUT
|
|
647
647
|
end
|
648
648
|
|
649
649
|
|
650
|
-
#
|
651
|
-
#
|
652
|
-
#
|
653
|
-
#
|
650
|
+
# deltacn_orig - the one that sequest originally reports (top hit gets 0.0)
|
651
|
+
# deltacn - modified to be that of the next best hit (by xcorr) and the last
|
652
|
+
# hit takes 1.1. This is what is called deltacn by bioworks and pepprophet
|
653
|
+
# (at least for the first few years). If filtering occurs, it will be
|
654
|
+
# updated.
|
655
|
+
# deltacn_orig_updated - the latest updated value of deltacn.
|
656
|
+
# Originally, this will be equal to deltacn_orig. After filtering, this will
|
657
|
+
# be recalculated. To know if this will be different from deltacn_orig, query
|
658
|
+
# match.srf.filtered_by_precursor_mass_tolerance. If this is changed, then
|
659
|
+
# deltacn should also be changed to reflect it.
|
660
|
+
# mh - the theoretical mass + h
|
654
661
|
# prots are created as SRF prot objects with a reference and linked to their
|
655
662
|
# peptides (from global hash by reference)
|
656
663
|
# ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
|
@@ -659,9 +666,9 @@ end
|
|
659
666
|
# the first one listed
|
660
667
|
# srf = the srf object this scan came from
|
661
668
|
|
662
|
-
SRF::OUT::Pep = ArrayClass.new(%w( mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn) )
|
669
|
+
SRF::OUT::Pep = ArrayClass.new(%w( mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
|
663
670
|
|
664
|
-
# 0=mh 1=
|
671
|
+
# 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
|
665
672
|
|
666
673
|
class SRF::OUT::Pep
|
667
674
|
include SpecID::Pep
|
@@ -674,16 +681,18 @@ class SRF::OUT::Pep
|
|
674
681
|
ar[-1].deltacn = 1.1
|
675
682
|
end
|
676
683
|
|
677
|
-
#
|
678
|
-
#
|
679
|
-
def self.
|
684
|
+
# (assumes sorted)
|
685
|
+
# recalculates deltacn from xcorrs and sets deltacn_orig_updated and deltacn
|
686
|
+
def self.update_deltacns_from_xcorr(ar)
|
680
687
|
if ar.size > 0
|
681
688
|
top_score = ar.first[3]
|
682
689
|
other_scores = (1...(ar.size)).to_a.map do |i|
|
683
|
-
|
690
|
+
1.0 - (ar[i][3]/top_score)
|
684
691
|
end
|
692
|
+
ar.first[20] = 0.0
|
685
693
|
(0...(ar.size-1)).each do |i|
|
686
|
-
ar[i][19] = other_scores[i]
|
694
|
+
ar[i][19] = other_scores[i] # deltacn
|
695
|
+
ar[i+1][20] = other_scores[i] # deltacn_orig_updated
|
687
696
|
end
|
688
697
|
ar.last[19] = 1.1
|
689
698
|
end
|
@@ -753,6 +762,9 @@ class SRF::OUT::Pep
|
|
753
762
|
|
754
763
|
self[0,10] = st.unpack(unpack)
|
755
764
|
|
765
|
+
# set deltacn_orig_updated
|
766
|
+
self[20] = self[1]
|
767
|
+
|
756
768
|
# we are slicing the reference to 38 chars to be the same length as
|
757
769
|
# duplicate references
|
758
770
|
self[10] = [new_protein(self[10][0,38], self, global_ref_hash)]
|
@@ -764,8 +776,6 @@ class SRF::OUT::Pep
|
|
764
776
|
self
|
765
777
|
end
|
766
778
|
|
767
|
-
|
768
|
-
|
769
779
|
def new_protein(reference, peptide, global_ref_hash)
|
770
780
|
if global_ref_hash.key? reference
|
771
781
|
global_ref_hash[reference].peps << peptide
|