mspire 0.3.1 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/bin/bioworks_to_pepxml.rb +15 -3
- data/bin/ms_to_lmat.rb +2 -1
- data/bin/sqt_group.rb +26 -0
- data/changelog.txt +36 -0
- data/lib/ms/msrun.rb +3 -1
- data/lib/ms/parser/mzdata/dom.rb +14 -14
- data/lib/ms/scan.rb +3 -3
- data/lib/mspire.rb +1 -1
- data/lib/sample_enzyme.rb +39 -0
- data/lib/spec_id.rb +18 -0
- data/lib/spec_id/aa_freqs.rb +6 -9
- data/lib/spec_id/digestor.rb +16 -17
- data/lib/spec_id/mass.rb +63 -1
- data/lib/spec_id/parser/proph.rb +101 -2
- data/lib/spec_id/precision/filter.rb +3 -2
- data/lib/spec_id/precision/filter/cmdline.rb +3 -1
- data/lib/spec_id/precision/filter/output.rb +1 -0
- data/lib/spec_id/precision/prob.rb +88 -21
- data/lib/spec_id/precision/prob/cmdline.rb +28 -16
- data/lib/spec_id/precision/prob/output.rb +8 -2
- data/lib/spec_id/proph/pep_summary.rb +25 -12
- data/lib/spec_id/sequest.rb +28 -0
- data/lib/spec_id/sequest/pepxml.rb +142 -197
- data/lib/spec_id/sqt.rb +349 -0
- data/lib/spec_id/srf.rb +33 -23
- data/lib/validator.rb +40 -57
- data/lib/validator/aa.rb +3 -90
- data/lib/validator/aa_est.rb +112 -0
- data/lib/validator/cmdline.rb +163 -31
- data/lib/validator/decoy.rb +15 -7
- data/lib/validator/digestion_based.rb +5 -4
- data/lib/validator/q_value.rb +32 -0
- data/script/peps_per_bin.rb +67 -0
- data/script/sqt_to_meta.rb +24 -0
- data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
- data/specs/bin/fasta_shaker_spec.rb +2 -2
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
- data/specs/bin/filter_and_validate_spec.rb +25 -6
- data/specs/bin/ms_to_lmat_spec.rb +2 -2
- data/specs/bin/prob_validate_spec.rb +5 -3
- data/specs/sample_enzyme_spec.rb +86 -1
- data/specs/spec_helper.rb +11 -9
- data/specs/spec_id/bioworks_spec.rb +2 -1
- data/specs/spec_id/precision/filter_spec.rb +5 -5
- data/specs/spec_id/precision/prob_spec.rb +0 -67
- data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
- data/specs/spec_id/protein_summary_spec.rb +4 -4
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
- data/specs/spec_id/sequest_spec.rb +38 -0
- data/specs/spec_id/sqt_spec.rb +111 -3
- data/specs/spec_id_spec.rb +2 -0
- data/specs/transmem/phobius_spec.rb +3 -1
- data/specs/transmem/toppred_spec.rb +1 -1
- data/specs/validator/aa_est_spec.rb +66 -0
- data/specs/validator/aa_spec.rb +1 -68
- data/specs/validator/background_spec.rb +2 -0
- data/specs/validator/bias_spec.rb +3 -27
- data/specs/validator/decoy_spec.rb +2 -2
- data/specs/validator/transmem_spec.rb +2 -1
- data/test_files/small.sqt +87 -0
- metadata +312 -293
data/lib/spec_id/sqt.rb
ADDED
@@ -0,0 +1,349 @@
|
|
1
|
+
require 'spec_id'
|
2
|
+
require 'array_class'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
class SQTGroup
|
6
|
+
include SpecID # inherits prots and peps accessors
|
7
|
+
|
8
|
+
attr_accessor :sqts, :filenames
|
9
|
+
|
10
|
+
# if filenames is a String, then it should be a filename to a file ending in
|
11
|
+
# '.sqg' (meta text file with list of .sqt files) else it should be an array
|
12
|
+
# of sqt filenames
|
13
|
+
def initialize(filenames=nil)
|
14
|
+
@filenames = filenames
|
15
|
+
@prots = []
|
16
|
+
@peps = []
|
17
|
+
@sqts = []
|
18
|
+
|
19
|
+
global_ref_hash = {}
|
20
|
+
## This is duplicated in SRFGroup (should refactor eventually)
|
21
|
+
if filenames
|
22
|
+
if filenames.is_a?(String) && filenames =~ /\.sqg$/
|
23
|
+
srg_filename = filenames.dup
|
24
|
+
@filename = srg_filename
|
25
|
+
@filenames = IO.readlines(filenames).grep(/\w/).map {|v| v.chomp }
|
26
|
+
@filenames.each do |file|
|
27
|
+
if !File.exist? file
|
28
|
+
puts "File: #{file} in #{srg_filename} does not exist!"
|
29
|
+
puts "Please modify #{srg_filename} to point to existing files."
|
30
|
+
abort
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
@filenames.each do |file|
|
35
|
+
@sqts << SQT.new(file, @peps, global_ref_hash)
|
36
|
+
end
|
37
|
+
|
38
|
+
@prots = global_ref_hash.values
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# NOTE THAT this is copy/paste from srf.rb, should be refactored...
|
43
|
+
# returns the filename used
|
44
|
+
# if the file exists, the name will be expanded to full path, otherwise just
|
45
|
+
# what is given
|
46
|
+
def to_sqg(sqg_filename='bioworks.sqg')
|
47
|
+
File.open(sqg_filename, 'w') do |v|
|
48
|
+
@filenames.each do |sqt_file|
|
49
|
+
if File.exist? sqt_file
|
50
|
+
v.puts File.expand_path(sqt_file)
|
51
|
+
else
|
52
|
+
v.puts sqt_file
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
sqg_filename
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
class SQT
|
62
|
+
PercolatorHeaderMatch = /^Percolator v/
|
63
|
+
Delimiter = "\t"
|
64
|
+
attr_accessor :header
|
65
|
+
attr_accessor :spectra
|
66
|
+
attr_accessor :base_name
|
67
|
+
# boolean
|
68
|
+
attr_accessor :percolator_results
|
69
|
+
|
70
|
+
def initialize(filename=nil, peps=[], global_ref_hash={})
|
71
|
+
if filename
|
72
|
+
from_file(filename, peps, global_ref_hash)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# if the file contains the header key '/$Percolator v/' then the results
|
77
|
+
# will be interpreted as percolator results
|
78
|
+
def from_file(filename, peps=[], global_ref_hash={}, percolator_results=false)
|
79
|
+
@percolator_results = percolator_results
|
80
|
+
@base_name = File.basename( filename.gsub('\\','/') ).sub(/\.\w+$/, '')
|
81
|
+
File.open(filename) do |fh|
|
82
|
+
@header = SQT::Header.new.from_handle(fh)
|
83
|
+
if @header.keys.any? {|v| v =~ PercolatorHeaderMatch }
|
84
|
+
@percolator_results = true
|
85
|
+
end
|
86
|
+
@spectra = SQT::Spectrum.spectra_from_handle(fh, @base_name, peps, global_ref_hash, @percolator_results)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
# Inherits from hash, so all header stuff can be accessed by key. Multiline
|
93
|
+
# values will be pushed into an array.
|
94
|
+
# All header values are stored as (newline-removed) strings!
|
95
|
+
class SQT::Header < Hash
|
96
|
+
Leader = 'H'
|
97
|
+
|
98
|
+
# These will be in arrays no matter what: StaticMod, DynamicMod, Comment
|
99
|
+
# Any other keys repeated will be shoved into an array; otherwise a string
|
100
|
+
Arrayed = %w(DyanmicMod StaticMod Comment).to_set
|
101
|
+
|
102
|
+
HeaderKeys = {
|
103
|
+
:sqt_generator => 'SQTGenerator',
|
104
|
+
:sqt_generator_version => 'SQTGeneratorVersion',
|
105
|
+
:database => 'Database',
|
106
|
+
:fragment_masses => 'FragmentMasses',
|
107
|
+
:precursor_masses => 'PrecursorMasses',
|
108
|
+
:start_time => 'StartTime',
|
109
|
+
:db_seq_length => 'DBSeqLength',
|
110
|
+
:db_locus_count => 'DBLocusCount',
|
111
|
+
:db_md5sum => 'DBMD5Sum',
|
112
|
+
:peptide_mass_tolerance => 'Alg-PreMassTol',
|
113
|
+
:fragment_ion_tolerance => 'Alg-FragMassTol',
|
114
|
+
# nonstandard (mine)
|
115
|
+
:peptide_mass_units => 'Alg-PreMassUnits',
|
116
|
+
:ion_series => 'Alg-IonSeries',
|
117
|
+
:enzyme => 'Alg-Enzyme',
|
118
|
+
# nonstandard (mine)
|
119
|
+
:ms_model => 'Alg-MSModel',
|
120
|
+
:static_mods => 'StaticMod',
|
121
|
+
:dynamic_mods => 'DynamicMod',
|
122
|
+
:comments => 'Comment'
|
123
|
+
}
|
124
|
+
|
125
|
+
|
126
|
+
KeysToAtts = HeaderKeys.invert
|
127
|
+
|
128
|
+
HeaderKeys.keys.each do |ky|
|
129
|
+
attr_accessor ky
|
130
|
+
end
|
131
|
+
|
132
|
+
def from_handle(fh)
|
133
|
+
Arrayed.each do |ky|
|
134
|
+
self[ky] = []
|
135
|
+
end
|
136
|
+
pos = fh.pos
|
137
|
+
lines = []
|
138
|
+
loop do
|
139
|
+
line = fh.gets
|
140
|
+
if line && (line[0,1] == SQT::Header::Leader )
|
141
|
+
lines << line
|
142
|
+
else # reset the fh.pos and we're done
|
143
|
+
fh.pos = pos
|
144
|
+
break
|
145
|
+
end
|
146
|
+
pos = fh.pos
|
147
|
+
end
|
148
|
+
from_lines(lines)
|
149
|
+
end
|
150
|
+
|
151
|
+
def from_lines(array_of_header_lines)
|
152
|
+
array_of_header_lines.each do |line|
|
153
|
+
line.chomp!
|
154
|
+
(ky, *rest) = line.split(SQT::Delimiter)[1..-1]
|
155
|
+
# just in case they have any tabs in their field
|
156
|
+
value = rest.join(SQT::Delimiter)
|
157
|
+
if Arrayed.include?(ky)
|
158
|
+
self[ky] << value
|
159
|
+
elsif self.key? ky # already exists
|
160
|
+
if self[ky].is_a? Array
|
161
|
+
self[ky] << value
|
162
|
+
else
|
163
|
+
self[ky] = [self[ky], value]
|
164
|
+
end
|
165
|
+
else # normal
|
166
|
+
self[ky] = value
|
167
|
+
end
|
168
|
+
end
|
169
|
+
KeysToAtts.each do |ky,methd|
|
170
|
+
self.send("#{methd}=".to_sym, self[ky])
|
171
|
+
end
|
172
|
+
self
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
# all are cast as expected (total_intensity is a float)
|
178
|
+
# mh = observed mh
|
179
|
+
SQT::Spectrum = ArrayClass.new(%w[first_scan last_scan charge time_to_process node mh total_intensity lowest_sp num_matched_peptides matches])
|
180
|
+
|
181
|
+
# 0=first_scan 1=last_scan 2=charge 3=time_to_process 4=node 5=mh 6=total_intensity 7=lowest_sp 8=num_matched_peptides 9=matches
|
182
|
+
|
183
|
+
class SQT::Spectrum
|
184
|
+
Leader = 'S'
|
185
|
+
|
186
|
+
# assumes the first line starts with an 'S'
|
187
|
+
def self.spectra_from_handle(fh, base_name, peps=[], global_ref_hash={}, percolator_results=false)
|
188
|
+
spectra = []
|
189
|
+
|
190
|
+
while line = fh.gets
|
191
|
+
case line[0,1]
|
192
|
+
when SQT::Spectrum::Leader
|
193
|
+
spectrum = SQT::Spectrum.new.from_line( line )
|
194
|
+
spectra << spectrum
|
195
|
+
matches = []
|
196
|
+
spectrum.matches = matches
|
197
|
+
when SQT::Match::Leader
|
198
|
+
match_klass = if percolator_results
|
199
|
+
SQT::Match::Percolator
|
200
|
+
else
|
201
|
+
SQT::Match
|
202
|
+
end
|
203
|
+
match = match_klass.new.from_line( line )
|
204
|
+
match[10,3] = spectrum[0,3]
|
205
|
+
match[15] = base_name
|
206
|
+
matches << match
|
207
|
+
peps << match
|
208
|
+
loci = []
|
209
|
+
match.loci = loci
|
210
|
+
matches << match
|
211
|
+
when SQT::Locus::Leader
|
212
|
+
line.chomp!
|
213
|
+
key = line.split(SQT::Delimiter)[1]
|
214
|
+
locus =
|
215
|
+
if global_ref_hash.key?(key)
|
216
|
+
global_ref_hash[key]
|
217
|
+
else
|
218
|
+
locus = SQT::Locus.new.from_line( line )
|
219
|
+
locus.peps = []
|
220
|
+
global_ref_hash[key] = locus
|
221
|
+
end
|
222
|
+
locus.peps << match
|
223
|
+
loci << locus
|
224
|
+
end
|
225
|
+
end
|
226
|
+
# set the deltacn:
|
227
|
+
set_deltacn(spectra)
|
228
|
+
spectra
|
229
|
+
end
|
230
|
+
|
231
|
+
def self.set_deltacn(spectra)
|
232
|
+
spectra.each do |spec|
|
233
|
+
matches = spec.matches
|
234
|
+
if matches.size > 0
|
235
|
+
|
236
|
+
(0...(matches.size-1)).each do |i|
|
237
|
+
matches[i].deltacn = matches[i+1].deltacn_orig
|
238
|
+
end
|
239
|
+
matches[-1].deltacn = 1.1
|
240
|
+
end
|
241
|
+
end
|
242
|
+
spectra
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
# returns an array -> [the next spectra line (or nil if eof), spectrum]
|
247
|
+
def from_line(line)
|
248
|
+
line.chomp!
|
249
|
+
ar = line.split(SQT::Delimiter)
|
250
|
+
self[0] = ar[1].to_i
|
251
|
+
self[1] = ar[2].to_i
|
252
|
+
self[2] = ar[3].to_i
|
253
|
+
self[3] = ar[4].to_f
|
254
|
+
self[4] = ar[5]
|
255
|
+
self[5] = ar[6].to_f
|
256
|
+
self[6] = ar[7].to_f
|
257
|
+
self[7] = ar[8].to_f
|
258
|
+
self[8] = ar[9].to_i
|
259
|
+
self[9] = []
|
260
|
+
self
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# SQT format uses only indices 0 - 9
|
265
|
+
SQT::Match = ArrayClass.new(%w[rxcorr rsp mh deltacn_orig xcorr sp ions_matched ions_total sequence manual_validation_status first_scan last_scan charge deltacn aaseq base_name loci])
|
266
|
+
|
267
|
+
# 0=rxcorr 1=rsp 2=mh 3=deltacn_orig 4=xcorr 5=sp 6=ions_matched 7=ions_total 8=sequence 9=manual_validation_status 10=first_scan 11=last_scan 12=charge 13=deltacn 14=aaseq 15=base_name 16=loci
|
268
|
+
|
269
|
+
# rxcorr = rank by xcorr
|
270
|
+
# rsp = rank by sp
|
271
|
+
# NOTE:
|
272
|
+
# deltacn_orig
|
273
|
+
# deltacn is the adjusted deltacn (like Bioworks - shift all scores up and
|
274
|
+
# give the last one 1.1)
|
275
|
+
class SQT::Match
|
276
|
+
include SpecID::Pep
|
277
|
+
Leader = 'M'
|
278
|
+
|
279
|
+
# same as 'loci'
|
280
|
+
def prots
|
281
|
+
self[16]
|
282
|
+
end
|
283
|
+
|
284
|
+
def from_line(line)
|
285
|
+
line.chomp!
|
286
|
+
ar = line.split(SQT::Delimiter)
|
287
|
+
self[0] = ar[1].to_i
|
288
|
+
self[1] = ar[2].to_i
|
289
|
+
self[2] = ar[3].to_f
|
290
|
+
self[3] = ar[4].to_f
|
291
|
+
self[4] = ar[5].to_f
|
292
|
+
self[5] = ar[6].to_f
|
293
|
+
self[6] = ar[7].to_i
|
294
|
+
self[7] = ar[8].to_i
|
295
|
+
self[8] = ar[9]
|
296
|
+
self[9] = ar[10]
|
297
|
+
self[14] = SpecID::Pep.sequence_to_aaseq(self[8])
|
298
|
+
self
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
|
303
|
+
class SQT::Match::Percolator < SQT::Match
|
304
|
+
# we will keep access to these old terms since we can then access routines
|
305
|
+
# that sort on xcorr...
|
306
|
+
#undef_method :xcorr
|
307
|
+
#undef_method :xcorr=
|
308
|
+
#undef_method :sp
|
309
|
+
#undef_method :sp=
|
310
|
+
|
311
|
+
def percolator_score
|
312
|
+
self[4]
|
313
|
+
end
|
314
|
+
def percolator_score=(score)
|
315
|
+
self[4] = score
|
316
|
+
end
|
317
|
+
def negative_q_value
|
318
|
+
self[5]
|
319
|
+
end
|
320
|
+
def negative_q_value=(arg)
|
321
|
+
self[5] = arg
|
322
|
+
end
|
323
|
+
def q_value
|
324
|
+
-self[5]
|
325
|
+
end
|
326
|
+
# for compatibility with scripts that want this guy
|
327
|
+
def probability
|
328
|
+
-self[5]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
SQT::Locus = ArrayClass.new(%w[locus description peps])
|
333
|
+
|
334
|
+
class SQT::Locus
|
335
|
+
include SpecID::Prot
|
336
|
+
Leader = 'L'
|
337
|
+
|
338
|
+
def first_entry ; self[0] end
|
339
|
+
def reference ; self[0] end
|
340
|
+
|
341
|
+
def from_line(line)
|
342
|
+
line.chomp!
|
343
|
+
ar = line.split(SQT::Delimiter)
|
344
|
+
self[0] = ar[1]
|
345
|
+
self[1] = ar[2]
|
346
|
+
self
|
347
|
+
end
|
348
|
+
|
349
|
+
end
|
data/lib/spec_id/srf.rb
CHANGED
@@ -3,7 +3,6 @@ require 'spec_id/sequest'
|
|
3
3
|
require 'fasta'
|
4
4
|
require 'mspire'
|
5
5
|
require 'set'
|
6
|
-
require 'fasta'
|
7
6
|
|
8
7
|
module BinaryReader
|
9
8
|
Null_char = "\0"[0] ## TODO: change for ruby 1.9 or 2.0
|
@@ -40,6 +39,7 @@ class SRFGroup
|
|
40
39
|
@prots = []
|
41
40
|
@srfs = []
|
42
41
|
|
42
|
+
# This is essentially duplicated in SQTGroup (should refactor eventually)
|
43
43
|
global_ref_hash = {}
|
44
44
|
if filenames
|
45
45
|
if filenames.is_a?(String) && filenames =~ /\.srg$/
|
@@ -102,6 +102,7 @@ class SRFGroup
|
|
102
102
|
end
|
103
103
|
|
104
104
|
@srfs.each do |srf|
|
105
|
+
srf.filtered_by_precursor_mass_tolerance = true
|
105
106
|
srf.out_files.each do |out_file|
|
106
107
|
hits = out_file.hits
|
107
108
|
before = hits.size
|
@@ -129,7 +130,7 @@ class SRFGroup
|
|
129
130
|
do_not_keep
|
130
131
|
end
|
131
132
|
if hits.size != before
|
132
|
-
SRF::OUT::Pep.
|
133
|
+
SRF::OUT::Pep.update_deltacns_from_xcorr(hits)
|
133
134
|
out_file.num_hits = hits.size
|
134
135
|
end
|
135
136
|
end
|
@@ -157,9 +158,9 @@ end
|
|
157
158
|
|
158
159
|
class SRF
|
159
160
|
|
160
|
-
# a string 3.3 or 3.2
|
161
|
+
# a string 3.5, 3.3 or 3.2
|
161
162
|
attr_accessor :version
|
162
|
-
|
163
|
+
|
163
164
|
attr_accessor :header
|
164
165
|
attr_accessor :dta_files
|
165
166
|
attr_accessor :out_files
|
@@ -170,8 +171,8 @@ class SRF
|
|
170
171
|
attr_accessor :base_name
|
171
172
|
# this is the global peptides array
|
172
173
|
attr_accessor :peps
|
173
|
-
|
174
|
-
attr_accessor :
|
174
|
+
|
175
|
+
attr_accessor :filtered_by_precursor_mass_tolerance
|
175
176
|
|
176
177
|
def dta_start_byte
|
177
178
|
case @version
|
@@ -321,7 +322,6 @@ class SRF
|
|
321
322
|
# * Number of sequences matching this precursor ion
|
322
323
|
#########################################
|
323
324
|
|
324
|
-
|
325
325
|
manual_validation_status = 'U'
|
326
326
|
self.out_files.zip(dta_files) do |out_file, dta_file|
|
327
327
|
# don't have the time to process (using 0.0 like bioworks 3.2)
|
@@ -337,17 +337,17 @@ class SRF
|
|
337
337
|
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
338
338
|
out_file.hits.each_with_index do |hit,index|
|
339
339
|
hit_mh = hit.mh
|
340
|
-
|
340
|
+
hit_deltacn_orig_updated = hit.deltacn_orig_updated
|
341
341
|
hit_xcorr = hit.xcorr
|
342
342
|
hit_sp = hit.sp
|
343
343
|
if opt[:round]
|
344
344
|
hit_mh = round(hit_mh, mh_dp)
|
345
|
-
|
345
|
+
hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
|
346
346
|
hit_xcorr = round(hit_xcorr, xcorr_dp)
|
347
347
|
hit_sp = round(hit_sp, sp_dp)
|
348
348
|
end
|
349
349
|
# note that the rank is determined by the order..
|
350
|
-
out.puts ['M', index+1, hit.rsp, hit_mh,
|
350
|
+
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
351
351
|
hit.prots.each do |prot|
|
352
352
|
out.puts ['L', prot.first_entry].join("\t")
|
353
353
|
end
|
@@ -647,10 +647,17 @@ class SRF::OUT
|
|
647
647
|
end
|
648
648
|
|
649
649
|
|
650
|
-
#
|
651
|
-
#
|
652
|
-
#
|
653
|
-
#
|
650
|
+
# deltacn_orig - the one that sequest originally reports (top hit gets 0.0)
|
651
|
+
# deltacn - modified to be that of the next best hit (by xcorr) and the last
|
652
|
+
# hit takes 1.1. This is what is called deltacn by bioworks and pepprophet
|
653
|
+
# (at least for the first few years). If filtering occurs, it will be
|
654
|
+
# updated.
|
655
|
+
# deltacn_orig_updated - the latest updated value of deltacn.
|
656
|
+
# Originally, this will be equal to deltacn_orig. After filtering, this will
|
657
|
+
# be recalculated. To know if this will be different from deltacn_orig, query
|
658
|
+
# match.srf.filtered_by_precursor_mass_tolerance. If this is changed, then
|
659
|
+
# deltacn should also be changed to reflect it.
|
660
|
+
# mh - the theoretical mass + h
|
654
661
|
# prots are created as SRF prot objects with a reference and linked to their
|
655
662
|
# peptides (from global hash by reference)
|
656
663
|
# ppm = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
|
@@ -659,9 +666,9 @@ end
|
|
659
666
|
# the first one listed
|
660
667
|
# srf = the srf object this scan came from
|
661
668
|
|
662
|
-
SRF::OUT::Pep = ArrayClass.new(%w( mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn) )
|
669
|
+
SRF::OUT::Pep = ArrayClass.new(%w( mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
|
663
670
|
|
664
|
-
# 0=mh 1=
|
671
|
+
# 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
|
665
672
|
|
666
673
|
class SRF::OUT::Pep
|
667
674
|
include SpecID::Pep
|
@@ -674,16 +681,18 @@ class SRF::OUT::Pep
|
|
674
681
|
ar[-1].deltacn = 1.1
|
675
682
|
end
|
676
683
|
|
677
|
-
#
|
678
|
-
#
|
679
|
-
def self.
|
684
|
+
# (assumes sorted)
|
685
|
+
# recalculates deltacn from xcorrs and sets deltacn_orig_updated and deltacn
|
686
|
+
def self.update_deltacns_from_xcorr(ar)
|
680
687
|
if ar.size > 0
|
681
688
|
top_score = ar.first[3]
|
682
689
|
other_scores = (1...(ar.size)).to_a.map do |i|
|
683
|
-
|
690
|
+
1.0 - (ar[i][3]/top_score)
|
684
691
|
end
|
692
|
+
ar.first[20] = 0.0
|
685
693
|
(0...(ar.size-1)).each do |i|
|
686
|
-
ar[i][19] = other_scores[i]
|
694
|
+
ar[i][19] = other_scores[i] # deltacn
|
695
|
+
ar[i+1][20] = other_scores[i] # deltacn_orig_updated
|
687
696
|
end
|
688
697
|
ar.last[19] = 1.1
|
689
698
|
end
|
@@ -753,6 +762,9 @@ class SRF::OUT::Pep
|
|
753
762
|
|
754
763
|
self[0,10] = st.unpack(unpack)
|
755
764
|
|
765
|
+
# set deltacn_orig_updated
|
766
|
+
self[20] = self[1]
|
767
|
+
|
756
768
|
# we are slicing the reference to 38 chars to be the same length as
|
757
769
|
# duplicate references
|
758
770
|
self[10] = [new_protein(self[10][0,38], self, global_ref_hash)]
|
@@ -764,8 +776,6 @@ class SRF::OUT::Pep
|
|
764
776
|
self
|
765
777
|
end
|
766
778
|
|
767
|
-
|
768
|
-
|
769
779
|
def new_protein(reference, peptide, global_ref_hash)
|
770
780
|
if global_ref_hash.key? reference
|
771
781
|
global_ref_hash[reference].peps << peptide
|