ms-sequest 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +14 -0
- data/.gitignore +8 -0
- data/.gitmodules +9 -0
- data/History +8 -0
- data/{MIT-LICENSE → LICENSE} +1 -0
- data/README.rdoc +77 -0
- data/Rakefile +110 -0
- data/VERSION +1 -0
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/bioworks.rb +498 -0
- data/lib/ms/sequest/pepxml.rb +1458 -0
- data/lib/ms/sequest/srf.rb +4 -3
- data/lib/ms/sequest/srf/search.rb +1 -1
- data/lib/ms/sequest/srf/search/tap.rb +1 -1
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/ms/sequest/bioworks_spec.rb +153 -0
- data/spec/ms/sequest/params_spec.rb +131 -0
- data/spec/ms/sequest/pepxml_spec.rb +376 -0
- data/spec/ms/sequest/sqt_spec.rb +78 -0
- data/spec/ms/sequest/sqt_spec_helper.rb +34 -0
- data/spec/ms/sequest/srf/search_spec.rb +53 -0
- data/spec/ms/sequest/srf/search_spec_helper.rb +341 -0
- data/spec/ms/sequest/srf/sqt_spec.rb +142 -0
- data/spec/ms/sequest/srf_spec.rb +182 -0
- data/spec/ms/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +51 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- data/tap.yml +0 -0
- metadata +74 -21
- data/README +0 -23
@@ -0,0 +1,1458 @@
|
|
1
|
+
|
2
|
+
require 'sample_enzyme'
|
3
|
+
require 'ms/parser/mzxml'
|
4
|
+
require 'hash_by'
|
5
|
+
require 'set_from_hash'
|
6
|
+
require 'spec_id/bioworks'
|
7
|
+
require 'instance_var_set_from_hash'
|
8
|
+
require 'ms/msrun'
|
9
|
+
require 'spec_id/srf'
|
10
|
+
require 'spec_id/sequest/params'
|
11
|
+
require 'fileutils'
|
12
|
+
|
13
|
+
class Numeric
|
14
|
+
# returns a string with a + or - on the front
|
15
|
+
def to_plus_minus_string
|
16
|
+
if self >= 0
|
17
|
+
'+' << self.to_s
|
18
|
+
else
|
19
|
+
self.to_s
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
module Sequest ; end
|
26
|
+
class Sequest::PepXML; end
|
27
|
+
|
28
|
+
class Sequest::PepXML::MSMSPipelineAnalysis
|
29
|
+
include SpecIDXML
|
30
|
+
# Version 1.2.3
|
31
|
+
attr_writer :date
|
32
|
+
attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
|
33
|
+
attr_accessor :summary_xml
|
34
|
+
# Version 2.3.4
|
35
|
+
attr_writer :xmlns, :xmlns_xsi, :xsi_schema_location
|
36
|
+
attr_accessor :pepxml_version
|
37
|
+
attr_accessor :msms_run_summary
|
38
|
+
|
39
|
+
# if block given, sets msms_run_summary to block
|
40
|
+
def initialize(hash=nil)
|
41
|
+
@xmlns = nil
|
42
|
+
@xmlns_xsi = nil
|
43
|
+
@xsi_schema_location = nil
|
44
|
+
if hash
|
45
|
+
self.set_from_hash(hash)
|
46
|
+
end
|
47
|
+
if block_given?
|
48
|
+
@msms_run_summary = yield
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# if no date string given, then it will set to Time.now
|
53
|
+
def date
|
54
|
+
if @date ; @date
|
55
|
+
else
|
56
|
+
case Sequest::PepXML.pepxml_version
|
57
|
+
when 18 ; tarr = Time.now.to_a ; tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def xmlns
|
63
|
+
if @xmlns ; @xmlns
|
64
|
+
else ; "http://regis-web.systemsbiology.net/pepXML"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def xmlns_xsi
|
69
|
+
if @xmlns_xsi ; @xmlns_xsi
|
70
|
+
else ; "http://www.w3.org/2001/XMLSchema-instance"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def xsi_schema_location
|
75
|
+
if @xsi_schema_location ; @xsi_schema_location
|
76
|
+
else ; "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_pepxml
|
81
|
+
case Sequest::PepXML.pepxml_version
|
82
|
+
when 18
|
83
|
+
element_xml_and_att_string(:msms_pipeline_analysis, "date=\"#{date}\" xmlns=\"#{xmlns}\" xmlns:xsi=\"#{xmlns_xsi}\" xsi:schemaLocation=\"#{xsi_schema_location}\" summary_xml=\"#{summary_xml}\"") do
|
84
|
+
@msms_run_summary.to_pepxml
|
85
|
+
end
|
86
|
+
else
|
87
|
+
abort "Don't know how to deal with version: #{Sequest::PepXML.pepxml_version}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
class Sequest::PepXML::MSMSRunSummary
|
94
|
+
include SpecID
|
95
|
+
include SpecIDXML
|
96
|
+
|
97
|
+
# the version of TPP you are using (determines xml output)
|
98
|
+
# The name of the pep xml file (without extension) (but this is a long
|
99
|
+
# filename!!!)
|
100
|
+
attr_accessor :base_name
|
101
|
+
# The name of the mass spec manufacturer
|
102
|
+
attr_accessor :ms_manufacturer
|
103
|
+
attr_accessor :ms_model
|
104
|
+
attr_accessor :ms_mass_analyzer
|
105
|
+
attr_accessor :ms_detector
|
106
|
+
attr_accessor :raw_data_type
|
107
|
+
attr_accessor :raw_data
|
108
|
+
attr_accessor :ms_ionization
|
109
|
+
attr_accessor :pepxml_version
|
110
|
+
|
111
|
+
# A SampleEnzyme object (responds to: name, cut, no_cut, sense)
|
112
|
+
attr_accessor :sample_enzyme
|
113
|
+
# A SearchSummary object
|
114
|
+
attr_accessor :search_summary
|
115
|
+
# An array of spectrum_queries
|
116
|
+
attr_accessor :spectrum_queries
|
117
|
+
|
118
|
+
# takes a hash of name, value pairs
|
119
|
+
# if block given, spectrum_queries (should be array of spectrum queries) is
|
120
|
+
# set to the return value of the block
|
121
|
+
def initialize(hash=nil)
|
122
|
+
@spectrum_queries = []
|
123
|
+
if hash
|
124
|
+
instance_var_set_from_hash(hash)
|
125
|
+
end
|
126
|
+
if block_given? ; @spectrum_queries = yield end
|
127
|
+
end
|
128
|
+
|
129
|
+
def to_pepxml
|
130
|
+
case Sequest::PepXML.pepxml_version
|
131
|
+
when 18
|
132
|
+
element_xml_and_att_string(:msms_run_summary, "base_name=\"#{base_name}\" msManufacturer=\"#{ms_manufacturer}\" msModel=\"#{ms_model}\" msIonization=\"#{ms_ionization}\" msMassAnalyzer=\"#{ms_mass_analyzer}\" msDetector=\"#{ms_detector}\" raw_data_type=\"#{raw_data_type}\" raw_data=\"#{raw_data}\"") do
|
133
|
+
sample_enzyme.to_pepxml +
|
134
|
+
search_summary.to_pepxml +
|
135
|
+
spectrum_queries.map {|sq| sq.to_pepxml }.join
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def search_hit_class
|
141
|
+
Sequest::PepXML::SearchHit
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.from_pepxml_node(node)
|
145
|
+
self.new.from_pepxml_node(node)
|
146
|
+
end
|
147
|
+
|
148
|
+
# peps correspond to search_results
|
149
|
+
def from_pepxml_node(node)
|
150
|
+
@base_name = node['base_name']
|
151
|
+
@ms_manufacturer = node['msManufacturer']
|
152
|
+
@ms_model = node['msModel']
|
153
|
+
@ms_manufacturer = node['msIonization']
|
154
|
+
@ms_mass_analyzer = node['msMassAnalyzer']
|
155
|
+
@ms_detector = node['msDetector']
|
156
|
+
@raw_data_type = node['raw_data_type']
|
157
|
+
@raw_data = node['raw_data']
|
158
|
+
self
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
class Sequest::PepXML
|
165
|
+
include SpecIDXML
|
166
|
+
|
167
|
+
## CREATE a default version for the entire class
|
168
|
+
class << self
|
169
|
+
attr_accessor :pepxml_version
|
170
|
+
end
|
171
|
+
DEF_VERSION = 18
|
172
|
+
self.pepxml_version = DEF_VERSION # default version
|
173
|
+
|
174
|
+
attr_accessor :pepxml_version, :msms_pipeline_analysis
|
175
|
+
## the full path name (no extension)
|
176
|
+
attr_accessor :base_name
|
177
|
+
attr_accessor :h_plus
|
178
|
+
attr_accessor :avg_parent
|
179
|
+
|
180
|
+
#attr_accessor :spectrum_queries, :params, :base_name, :search_engine, :database, :raw_data_type, :raw_data, :out_data_type, :out_data, :sample_enzyme, :pepxml_version
|
181
|
+
|
182
|
+
# returns an array of spectrum queries
|
183
|
+
def spectrum_queries
|
184
|
+
msms_pipeline_analysis.msms_run_summary.spectrum_queries
|
185
|
+
end
|
186
|
+
|
187
|
+
# msms_pipeline_analysis is set to the result of the yielded block
|
188
|
+
# and set_mono_or_avg is called with params if given
|
189
|
+
def initialize(pepxml_version=DEF_VERSION, sequest_params_obj=nil)
|
190
|
+
self.class.pepxml_version = pepxml_version
|
191
|
+
if sequest_params_obj
|
192
|
+
set_mono_or_avg(sequest_params_obj)
|
193
|
+
end
|
194
|
+
if block_given?
|
195
|
+
@msms_pipeline_analysis = yield
|
196
|
+
@base_name = @msms_pipeline_analysis.msms_run_summary.base_name
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# sets @h_plus and @avg_parent from the sequest params object
|
201
|
+
def set_mono_or_avg(sequest_params_obj)
|
202
|
+
case sequest_params_obj.precursor_mass_type
|
203
|
+
when "monoisotopic" ; @avg_parent = false
|
204
|
+
else ; @avg_parent = true
|
205
|
+
end
|
206
|
+
|
207
|
+
case @avg_parent
|
208
|
+
when true ; @h_plus = SpecID::AVG[:h_plus]
|
209
|
+
when false ; @h_plus = SpecID::MONO[:h_plus]
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def date
|
214
|
+
Time.new.to_s
|
215
|
+
end
|
216
|
+
|
217
|
+
def xml_version
|
218
|
+
'<?xml version="1.0" encoding="UTF-8"?>' + "\n"
|
219
|
+
end
|
220
|
+
|
221
|
+
# for pepxml_version == 0
|
222
|
+
def doctype
|
223
|
+
'<!DOCTYPE msms_pipeline_analysis SYSTEM "/usr/bin/msms_analysis3.dtd">' + "\n"
|
224
|
+
end
|
225
|
+
|
226
|
+
def style_sheet
|
227
|
+
case self.class.pepxml_version
|
228
|
+
when 18
|
229
|
+
'<?xml-stylesheet type="text/xsl" href="/tools/bin/TPP/tpp/schema/pepXML_std.xsl"?>'
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def header
|
234
|
+
case self.class.pepxml_version
|
235
|
+
when 18 ; xml_version + style_sheet
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# updates the private attrs _num_prots and _first_prot on bioworks pep
|
240
|
+
# objects. Ideally, we'd like these attributes to reside elsewhere, but for
|
241
|
+
# memory concerns, this is best for now.
|
242
|
+
def self._prot_num_and_first_prot_by_pep(pep_array)
|
243
|
+
pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
|
244
|
+
prts = []
|
245
|
+
pep_arr.each { |pep| prts.push( *(pep.prots) ) }
|
246
|
+
prts.uniq!
|
247
|
+
_size = prts.size
|
248
|
+
pep_arr.each do |pep|
|
249
|
+
pep._num_prots = _size
|
250
|
+
pep._first_prot = prts.first
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
Default_Options = {
|
257
|
+
:out_path => '.',
|
258
|
+
#:backup_db_path => '.',
|
259
|
+
# a PepXML option
|
260
|
+
:pepxml_version => DEF_VERSION,
|
261
|
+
## MSMSRunSummary options:
|
262
|
+
# string must be recognized in sample_enzyme.rb
|
263
|
+
# or create your own SampleEnzyme object
|
264
|
+
:ms_manufacturer => 'ThermoFinnigan',
|
265
|
+
:ms_model => 'LCQ Deca XP Plus',
|
266
|
+
:ms_ionization => 'ESI',
|
267
|
+
:ms_mass_analyzer => 'Ion Trap',
|
268
|
+
:ms_detector => 'UNKNOWN',
|
269
|
+
:ms_data => '.', # path to ms data files (raw or mzxml)
|
270
|
+
:raw_data_type => "raw",
|
271
|
+
:raw_data => ".mzXML", ## even if you don't have it?
|
272
|
+
## SearchSummary options:
|
273
|
+
:out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
|
274
|
+
:out_data => ".tgz", ## may be srf??
|
275
|
+
:copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
|
276
|
+
:print => false, # print the objects to file
|
277
|
+
}
|
278
|
+
|
279
|
+
# will dynamically set :ms_model and :ms_mass_analyzer from srf info
|
280
|
+
# (ignoring defaults or anything passed in) for LTQ Orbitrap
|
281
|
+
# and LCQ Deca XP
|
282
|
+
# See SRF::Sequest::PepXML::Default_Options hash for defaults
|
283
|
+
# unless given, the out_path will be given as the path of the srf_file
|
284
|
+
# srf may be an object or a filename
|
285
|
+
def self.new_from_srf(srf, opts={})
|
286
|
+
opts = Default_Options.merge(opts)
|
287
|
+
|
288
|
+
## read the srf file
|
289
|
+
if srf.is_a? String
|
290
|
+
srf = SRF.new(srf)
|
291
|
+
end
|
292
|
+
|
293
|
+
## set the outpath
|
294
|
+
out_path = opts.delete(:out_path)
|
295
|
+
|
296
|
+
params = srf.params
|
297
|
+
|
298
|
+
## check to see if we need backup_db
|
299
|
+
backup_db_path = opts.delete(:backup_db_path)
|
300
|
+
if !File.exist?(params.database) && backup_db_path
|
301
|
+
params.database_path = backup_db_path
|
302
|
+
end
|
303
|
+
|
304
|
+
#######################################################################
|
305
|
+
# PREPARE THE OPTIONS:
|
306
|
+
#######################################################################
|
307
|
+
## remove items from the options hash that don't belong to
|
308
|
+
ppxml_version = opts.delete(:pepxml_version)
|
309
|
+
out_data_type = opts.delete(:out_data_type)
|
310
|
+
out_data = opts.delete(:out_data)
|
311
|
+
|
312
|
+
## Extract meta info from srf
|
313
|
+
bn_noext = base_name_noext(srf.header.raw_filename)
|
314
|
+
opts[:ms_model] = srf.header.model
|
315
|
+
case opts[:ms_model]
|
316
|
+
when /Orbitrap/
|
317
|
+
opts[:ms_mass_analyzer] = 'Orbitrap'
|
318
|
+
when /LCQ Deca XP/
|
319
|
+
opts[:ms_mass_analyzer] = 'Ion Trap'
|
320
|
+
end
|
321
|
+
|
322
|
+
## Create the base name
|
323
|
+
full_base_name_no_ext = make_base_name( File.expand_path(out_path), bn_noext)
|
324
|
+
opts[:base_name] = full_base_name_no_ext
|
325
|
+
|
326
|
+
## Create the search summary:
|
327
|
+
search_summary_options = {
|
328
|
+
:search_database => Sequest::PepXML::SearchDatabase.new(params),
|
329
|
+
:base_name => full_base_name_no_ext,
|
330
|
+
:out_data_type => out_data_type,
|
331
|
+
:out_data => out_data
|
332
|
+
}
|
333
|
+
modifications_string = srf.header.modifications
|
334
|
+
search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
|
335
|
+
|
336
|
+
# create the sample enzyme from the params object:
|
337
|
+
sample_enzyme_obj =
|
338
|
+
if opts[:sample_enzyme]
|
339
|
+
opts[:sample_enzyme]
|
340
|
+
else
|
341
|
+
params.sample_enzyme
|
342
|
+
end
|
343
|
+
opts[:sample_enzyme] = sample_enzyme_obj
|
344
|
+
|
345
|
+
## Create the pepxml obj and top level objects
|
346
|
+
pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
|
347
|
+
pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
|
348
|
+
pepxml_obj.msms_pipeline_analysis = pipeline
|
349
|
+
pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
|
350
|
+
pipeline.msms_run_summary.search_summary = search_summary
|
351
|
+
modifications_obj = search_summary.modifications
|
352
|
+
|
353
|
+
## name some common variables we'll need
|
354
|
+
h_plus = pepxml_obj.h_plus
|
355
|
+
avg_parent = pepxml_obj.avg_parent
|
356
|
+
|
357
|
+
|
358
|
+
## COPY MZXML FILES IF NECESSARY
|
359
|
+
if opts[:copy_mzxml]
|
360
|
+
mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
|
361
|
+
to_copy = MS::Converter::MzXML.file_to_mzxml(mzxml_pathname_noext)
|
362
|
+
if to_copy
|
363
|
+
FileUtils.cp to_copy, out_path
|
364
|
+
else
|
365
|
+
puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
|
366
|
+
puts "Perhaps you need to specifiy the location of the raw data"
|
367
|
+
puts "or need an mzXML converter (readw or t2x)"
|
368
|
+
exit
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
|
373
|
+
#######################################################################
|
374
|
+
# CREATE the spectrum_queries_ar
|
375
|
+
#######################################################################
|
376
|
+
srf_index = srf.index
|
377
|
+
out_files = srf.out_files
|
378
|
+
spectrum_queries_arr = Array.new(srf.dta_files.size)
|
379
|
+
files_with_hits_index = 0 ## will end up being 1 indexed
|
380
|
+
|
381
|
+
deltacn_orig = opts[:deltacn_orig]
|
382
|
+
deltacn_index =
|
383
|
+
if deltacn_orig ; 20
|
384
|
+
else 19
|
385
|
+
end
|
386
|
+
|
387
|
+
srf.dta_files.each_with_index do |dta_file,dta_i|
|
388
|
+
next if out_files[dta_i].num_hits == 0
|
389
|
+
files_with_hits_index += 1
|
390
|
+
|
391
|
+
precursor_neutral_mass = dta_file.mh - h_plus
|
392
|
+
|
393
|
+
(start_scan, end_scan, charge) = srf_index[dta_i]
|
394
|
+
sq_hash = {
|
395
|
+
:spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
|
396
|
+
:start_scan => start_scan,
|
397
|
+
:end_scan => end_scan,
|
398
|
+
:precursor_neutral_mass => precursor_neutral_mass,
|
399
|
+
:assumed_charge => charge.to_i,
|
400
|
+
:pepxml_version => ppxml_version,
|
401
|
+
:index => files_with_hits_index,
|
402
|
+
}
|
403
|
+
|
404
|
+
spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
|
405
|
+
|
406
|
+
|
407
|
+
hits = out_files[dta_i].hits
|
408
|
+
|
409
|
+
search_hits =
|
410
|
+
if opts[:all_hits]
|
411
|
+
Array.new(out_files[dta_i].num_hits) # all hits
|
412
|
+
else
|
413
|
+
Array.new(1) # top hit only
|
414
|
+
end
|
415
|
+
|
416
|
+
(0...(search_hits.size)).each do |hit_i|
|
417
|
+
hit = hits[hit_i]
|
418
|
+
# under the modified deltacn schema (like bioworks)
|
419
|
+
# Get proper deltacn and deltacnstar
|
420
|
+
# under new srf, deltacn is already corrected for what prophet wants,
|
421
|
+
# deltacn_orig_updated is how to access the old one
|
422
|
+
# Prophet deltacn is not the same as the native Sequest deltacn
|
423
|
+
# It is the deltacn of the second best hit!
|
424
|
+
|
425
|
+
## mass calculations:
|
426
|
+
calc_neutral_pep_mass = hit[0] - h_plus
|
427
|
+
|
428
|
+
|
429
|
+
sequence = hit.sequence
|
430
|
+
|
431
|
+
# NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
|
432
|
+
## THIS IS ALL INNER LOOP, so we make every effort at speed here:
|
433
|
+
(prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
|
434
|
+
# 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
|
435
|
+
|
436
|
+
sh_hash = {
|
437
|
+
:hit_rank => hit_i+1,
|
438
|
+
:peptide => pepseq,
|
439
|
+
:peptide_prev_aa => prevaa,
|
440
|
+
:peptide_next_aa => nextaa,
|
441
|
+
:protein => hit[10].first.reference.split(" ").first,
|
442
|
+
:num_tot_proteins => hit[10].size,
|
443
|
+
:num_matched_ions => hit[7],
|
444
|
+
:tot_num_ions => hit[8],
|
445
|
+
:calc_neutral_pep_mass => calc_neutral_pep_mass,
|
446
|
+
:massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
|
447
|
+
:num_tol_term => sample_enzyme_obj.num_tol_term(sequence),
|
448
|
+
:num_missed_cleavages => sample_enzyme_obj.num_missed_cleavages(pepseq),
|
449
|
+
:is_rejected => 0,
|
450
|
+
# These are search score attributes:
|
451
|
+
:xcorr => hit[3],
|
452
|
+
:deltacn => hit[deltacn_index],
|
453
|
+
:spscore => hit[2],
|
454
|
+
:sprank => hit[6],
|
455
|
+
:modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
|
456
|
+
}
|
457
|
+
unless deltacn_orig
|
458
|
+
sh_hash[:deltacnstar] =
|
459
|
+
if hits[hit_i+1].nil? # no next hit? then its deltacnstar == 1
|
460
|
+
'1'
|
461
|
+
else
|
462
|
+
'0'
|
463
|
+
end
|
464
|
+
end
|
465
|
+
search_hits[hit_i] = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
|
466
|
+
end
|
467
|
+
|
468
|
+
search_result = Sequest::PepXML::SearchResult.new
|
469
|
+
search_result.search_hits = search_hits
|
470
|
+
spectrum_query.search_results = [search_result]
|
471
|
+
spectrum_queries_arr[files_with_hits_index] = spectrum_query
|
472
|
+
end
|
473
|
+
spectrum_queries_arr.compact!
|
474
|
+
|
475
|
+
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
|
476
|
+
pepxml_obj.base_name = pipeline.msms_run_summary.base_name
|
477
|
+
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
|
478
|
+
|
479
|
+
pepxml_obj
|
480
|
+
end
|
481
|
+
|
482
|
+
# takes an .srg or bioworks.xml file
|
483
|
+
# if possible, ensures that an mzXML file is present for each pepxml file
|
484
|
+
# :print => true, will print files
|
485
|
+
# NOTES: num_tol_term and num_missing_cleavages are both calculated from the
|
486
|
+
# sample_enzyme. Thus, a No_Enzyme search may still pass in a
|
487
|
+
# :sample_enzyme option to get these calculated.
|
488
|
+
def self.set_from_bioworks(bioworks_file, opts={})
|
489
|
+
opts = Default_Options.merge(opts)
|
490
|
+
## Create the out_path directory if necessary
|
491
|
+
|
492
|
+
unless File.exist? opts[:out_path]
|
493
|
+
FileUtils.mkpath(opts[:out_path])
|
494
|
+
end
|
495
|
+
unless File.directory? opts[:out_path]
|
496
|
+
abort "#{opts[:out_path]} must be a directory!"
|
497
|
+
end
|
498
|
+
|
499
|
+
spec_id = SpecID.new(bioworks_file)
|
500
|
+
pepxml_objs =
|
501
|
+
if spec_id.is_a? Bioworks
|
502
|
+
abort("must have opts[:params] set!") unless opts[:params]
|
503
|
+
set_from_bioworks_xml(bioworks_file, opts[:params], opts)
|
504
|
+
elsif spec_id.is_a? SRFGroup
|
505
|
+
spec_id.srfs.map do |srf|
|
506
|
+
new_from_srf(srf, opts)
|
507
|
+
end
|
508
|
+
else
|
509
|
+
abort "invalid object"
|
510
|
+
end
|
511
|
+
|
512
|
+
if opts[:print]
|
513
|
+
pepxml_objs.each do |obj|
|
514
|
+
obj.to_pepxml(obj.base_name + ".xml")
|
515
|
+
end
|
516
|
+
end
|
517
|
+
pepxml_objs
|
518
|
+
end
|
519
|
+
|
520
|
+
|
521
|
+
# Takes bioworks 3.2/3.3 xml output (with no filters)
|
522
|
+
# Returns a list of PepXML objects
|
523
|
+
# params = sequest.params file
|
524
|
+
# bioworks = bioworks.xml exported multi-consensus view file
|
525
|
+
# pepxml_version = 0 for tpp 1.2.3
|
526
|
+
# pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
|
527
|
+
def self.set_from_bioworks_xml(bioworks, params, opts={})
|
528
|
+
opts = Default_Options.merge(opts)
|
529
|
+
pepxml_version, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
|
530
|
+
|
531
|
+
|
532
|
+
|
533
|
+
unless out_path
|
534
|
+
out_path = '.'
|
535
|
+
end
|
536
|
+
|
537
|
+
supported_versions = [18]
|
538
|
+
|
539
|
+
unless supported_versions.include?(opts[:pepxml_version])
|
540
|
+
abort "pepxml_version: #{pepxml_version} not currently supported. Current support is for versions #{supported_versions.join(', ')}"
|
541
|
+
end
|
542
|
+
|
543
|
+
## Turn params and bioworks_obj into objects if necessary:
|
544
|
+
# Params:
|
545
|
+
if params.class == Sequest::Params # OK!
|
546
|
+
elsif params.class == String ; params = Sequest::Params.new(params)
|
547
|
+
else ; abort "Don't recognize #{params} as object or string!"
|
548
|
+
end
|
549
|
+
# Bioworks:
|
550
|
+
if bioworks.class == Bioworks # OK!
|
551
|
+
elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
|
552
|
+
else ; abort "Don't recognize #{bioworks} as object or string!"
|
553
|
+
end
|
554
|
+
|
555
|
+
sample_enzyme_obj =
|
556
|
+
if opts[:sample_enzyme]
|
557
|
+
opts[:sample_enzyme]
|
558
|
+
else
|
559
|
+
params.sample_enzyme
|
560
|
+
end
|
561
|
+
|
562
|
+
#puts "bioworks.peps.size: #{bioworks.peps.size}"; #puts "bioworks.prots.size: #{bioworks.prots.size}"; #puts "Bioworks.version: #{bioworks.version}"
|
563
|
+
|
564
|
+
## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
|
565
|
+
## bioworks
|
566
|
+
#bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
|
567
|
+
|
568
|
+
## check to see if we need backup_db
|
569
|
+
|
570
|
+
backup_db_path = opts.delete(:backup_db_path)
|
571
|
+
if !File.exist?(params.database) && backup_db_path
|
572
|
+
params.database_path = backup_db_path
|
573
|
+
end
|
574
|
+
|
575
|
+
## Start
|
576
|
+
split_bio_objs = []
|
577
|
+
|
578
|
+
## (num_prots_by_pep, prot_by_pep) =
|
579
|
+
#num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"
|
580
|
+
|
581
|
+
modifications_string = bioworks.modifications
|
582
|
+
|
583
|
+
## Create a hash of spectrum_query arrays by filename (this very big block):
|
584
|
+
spectrum_queries_by_base_name = {}
|
585
|
+
# Hash by the filenames to split into filenames:
|
586
|
+
pepxml_objects = bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|
|
587
|
+
|
588
|
+
search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
|
589
|
+
modifications_obj = search_summary.modifications
|
590
|
+
|
591
|
+
pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
|
592
|
+
full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
|
593
|
+
|
594
|
+
case pepxml_version
|
595
|
+
when 18
|
596
|
+
pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
|
597
|
+
msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
|
598
|
+
:base_name => full_base_name_no_ext,
|
599
|
+
:ms_manufacturer => ms_manufacturer,
|
600
|
+
:ms_model => ms_model,
|
601
|
+
:ms_ionization => ms_ionization,
|
602
|
+
:ms_mass_analyzer => ms_mass_analyzer,
|
603
|
+
:ms_detector => ms_detector,
|
604
|
+
:raw_data_type => raw_data_type,
|
605
|
+
:raw_data => raw_data,
|
606
|
+
:sample_enzyme => sample_enzyme_obj, # usually, params.sample_enzyme,
|
607
|
+
:search_summary => search_summary,
|
608
|
+
})
|
609
|
+
pipeline.msms_run_summary = msms_run_summary
|
610
|
+
pepxml_obj.msms_pipeline_analysis = pipeline
|
611
|
+
pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name = full_base_name_no_ext
|
612
|
+
pepxml_obj.base_name = full_base_name_no_ext
|
613
|
+
pepxml_obj
|
614
|
+
end
|
615
|
+
|
616
|
+
# Create a hash by pep object containing num_tot_proteins
|
617
|
+
# This is only valid if all hits are present (no previous thresholding)
|
618
|
+
# Since out2summary only acts on one folder at a time,
|
619
|
+
# we should only do it for one folder at a time! (that's why we do this
|
620
|
+
# here instead of globally)
|
621
|
+
self._prot_num_and_first_prot_by_pep(pep_arr)
|
622
|
+
prec_mz_arr = nil
|
623
|
+
case x = bioworks.version
|
624
|
+
when /3.2/
|
625
|
+
calc_prec_by = :prec_mz_arr
|
626
|
+
# get the precursor_mz array for this filename
|
627
|
+
mzxml_file = MS::Converter::MzXML.file_to_mzxml(File.join(ms_data, base_name))
|
628
|
+
prec_mz_arr = MS::MSRun.precursor_mz_by_scan_num(mzxml_file)
|
629
|
+
when /3.3/
|
630
|
+
calc_prec_by = :deltamass
|
631
|
+
else
|
632
|
+
abort "invalid BioworksBrowser version: #{x}"
|
633
|
+
end
|
634
|
+
|
635
|
+
if opts[:copy_mzxml]
|
636
|
+
to_copy = MS::Converter::MzXML.file_to_mzxml(File.join(ms_data, base_name))
|
637
|
+
if to_copy
|
638
|
+
FileUtils.cp to_copy, out_path
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
|
643
|
+
spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).map do |key,arr|
|
644
|
+
|
645
|
+
|
646
|
+
# Sort_by_rank and take the top hit (to mimick out2summary):
|
647
|
+
|
648
|
+
arr = arr.sort_by {|pep| pep.xcorr.to_f } # ascending
|
649
|
+
top_pep = arr.pop
|
650
|
+
second_hit = arr.last # needed for deltacnstar
|
651
|
+
|
652
|
+
|
653
|
+
case calc_prec_by
|
654
|
+
when :prec_mz_arr
|
655
|
+
precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge, pepxml_obj.avg_parent)
|
656
|
+
when :deltamass
|
657
|
+
precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
|
658
|
+
end
|
659
|
+
|
660
|
+
calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
|
661
|
+
|
662
|
+
# deltacn & star:
|
663
|
+
# (NOTE: OLD?? out2summary wants the deltacn of the 2nd best hit.)
|
664
|
+
if second_hit
|
665
|
+
#top_pep.deltacn = second_hit.deltacn
|
666
|
+
deltacnstar = '0'
|
667
|
+
else
|
668
|
+
top_pep.deltacn = '1.0'
|
669
|
+
deltacnstar = '1'
|
670
|
+
end
|
671
|
+
# Create the nested structure of queries{results{hits}}
|
672
|
+
# (Ruby's blocks work beautifully for things like this)
|
673
|
+
spec_query = Sequest::PepXML::SpectrumQuery.new({
|
674
|
+
:spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
|
675
|
+
:start_scan => top_pep.first_scan,
|
676
|
+
:end_scan => top_pep.last_scan,
|
677
|
+
:precursor_neutral_mass => precursor_neutral_mass,
|
678
|
+
:assumed_charge => top_pep.charge,
|
679
|
+
:pepxml_version => pepxml_version,
|
680
|
+
})
|
681
|
+
|
682
|
+
|
683
|
+
search_result = Sequest::PepXML::SearchResult.new
|
684
|
+
#puts "set MASSDIFF: "
|
685
|
+
#p precursor_neutral_mass - calc_neutral_pep_mass
|
686
|
+
## Calculate some interdependent values;
|
687
|
+
# NOTE: the bioworks mass is reallyf M+H if two or more scans went
|
688
|
+
# into the search_hit; calc_neutral_pep_mass is simply the avg of
|
689
|
+
# precursor masses adjusted to be neutral
|
690
|
+
(prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
|
691
|
+
(num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
|
692
|
+
search_hit = Sequest::PepXML::SearchHit.new({
|
693
|
+
:hit_rank => 1,
|
694
|
+
:peptide => pepseq,
|
695
|
+
:peptide_prev_aa => prevaa,
|
696
|
+
:peptide_next_aa => nextaa,
|
697
|
+
:protein => top_pep._first_prot.reference.split(" ").first,
|
698
|
+
:num_tot_proteins => top_pep._num_prots,
|
699
|
+
:num_matched_ions => num_matched_ions,
|
700
|
+
:tot_num_ions => tot_num_ions,
|
701
|
+
:calc_neutral_pep_mass => calc_neutral_pep_mass,
|
702
|
+
:massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
|
703
|
+
:num_tol_term => sample_enzyme_obj.num_tol_term(top_pep.sequence),
|
704
|
+
:num_missed_cleavages => sample_enzyme_obj.num_missed_cleavages(pepseq),
|
705
|
+
:is_rejected => 0,
|
706
|
+
# These are search score attributes:
|
707
|
+
:xcorr => top_pep.xcorr,
|
708
|
+
:deltacn => top_pep.deltacn,
|
709
|
+
:deltacnstar => deltacnstar,
|
710
|
+
:spscore => top_pep.sp,
|
711
|
+
:sprank => top_pep.rsp,
|
712
|
+
:modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
|
713
|
+
:spectrum_query => spec_query,
|
714
|
+
})
|
715
|
+
search_result.search_hits = [search_hit] # there can be multiple search hits
|
716
|
+
spec_query.search_results = [search_result] # can be multiple search_results
|
717
|
+
spec_query
|
718
|
+
end
|
719
|
+
|
720
|
+
# create an index by spectrum as results end up typically in out2summary
|
721
|
+
# (I really dislike this order, however)
|
722
|
+
spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
|
723
|
+
spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
|
724
|
+
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
|
725
|
+
pepxml_obj
|
726
|
+
end ## collects pepxml_objs
|
727
|
+
# summary_xml is the short basename of the pepxml file (e.g., "020.xml")
|
728
|
+
pepxml_objects.sort_by {|obj| obj.summary_xml }
|
729
|
+
end
|
730
|
+
|
731
|
+
def summary_xml
|
732
|
+
base_name + ".xml"
|
733
|
+
end
|
734
|
+
|
735
|
+
def precursor_mass_type
|
736
|
+
@params.precursor_mass_type
|
737
|
+
end
|
738
|
+
|
739
|
+
def fragment_mass_type
|
740
|
+
@params.fragment_mass_type
|
741
|
+
end
|
742
|
+
|
743
|
+
# combines filename in a manner consistent with the path
|
744
|
+
def self.make_base_name(path, filename)
|
745
|
+
sep = '/'
|
746
|
+
if path.split('/').size < path.split("\\").size
|
747
|
+
sep = "\\"
|
748
|
+
end
|
749
|
+
if path.split('').last == sep
|
750
|
+
path + File.basename(filename)
|
751
|
+
else
|
752
|
+
path + sep + File.basename(filename)
|
753
|
+
end
|
754
|
+
end
|
755
|
+
|
756
|
+
# outputs pepxml, (to file if given)
|
757
|
+
def to_pepxml(file=nil)
|
758
|
+
string = header
|
759
|
+
string << @msms_pipeline_analysis.to_pepxml
|
760
|
+
|
761
|
+
if file
|
762
|
+
File.open(file, "w") do |fh| fh.print string end
|
763
|
+
end
|
764
|
+
string
|
765
|
+
end
|
766
|
+
|
767
|
+
# given any kind of filename (from windows or whatever)
|
768
|
+
# returns the base of the filename with no file extension
|
769
|
+
def self.base_name_noext(file)
|
770
|
+
file.gsub!("\\", '/')
|
771
|
+
File.basename(file).sub(/\.[\w^\.]+$/, '')
|
772
|
+
end
|
773
|
+
|
774
|
+
|
775
|
+
end # PepXML
|
776
|
+
|
777
|
+
|
778
|
+
class Sequest::PepXML::SearchResult
|
779
|
+
include SpecIDXML
|
780
|
+
# an array of search_hits
|
781
|
+
attr_accessor :search_hits
|
782
|
+
|
783
|
+
# if block given, then search_hits set to return value
|
784
|
+
def initialize(search_hits = [])
|
785
|
+
@search_hits = search_hits
|
786
|
+
end
|
787
|
+
|
788
|
+
def to_pepxml
|
789
|
+
element_xml_no_atts(:search_result) do
|
790
|
+
@search_hits.map {|sh| sh.to_pepxml }.join
|
791
|
+
end
|
792
|
+
end
|
793
|
+
|
794
|
+
end
|
795
|
+
|
796
|
+
class Sequest::PepXML::SearchSummary
|
797
|
+
include SpecIDXML
|
798
|
+
attr_accessor :params
|
799
|
+
attr_accessor :base_name
|
800
|
+
attr_accessor :out_data_type
|
801
|
+
attr_accessor :out_data
|
802
|
+
# by default, "1"
|
803
|
+
attr_accessor :search_id
|
804
|
+
attr_accessor :modifications
|
805
|
+
# A SearchDatabase object (responds to :local_path and :type)
|
806
|
+
attr_accessor :search_database
|
807
|
+
# if given a sequest params object, then will set the following attributes:
|
808
|
+
# args is a hash of parameters
|
809
|
+
# modifications_string -> See Modifications
|
810
|
+
def initialize(prms=nil, modifications_string='', args=nil)
|
811
|
+
@search_id = "1"
|
812
|
+
if prms
|
813
|
+
@params = prms
|
814
|
+
@modifications = Sequest::PepXML::Modifications.new(prms, modifications_string)
|
815
|
+
end
|
816
|
+
if args ; set_from_hash(args) end
|
817
|
+
end
|
818
|
+
|
819
|
+
def method_missing(symbol, *args)
|
820
|
+
if @params ; @params.send(symbol, *args) end
|
821
|
+
end
|
822
|
+
|
823
|
+
def to_pepxml
|
824
|
+
element_xml(:search_summary, [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]) do
|
825
|
+
search_database.to_pepxml +
|
826
|
+
if @params.enzyme =~ /^No_Enzyme/
|
827
|
+
''
|
828
|
+
else
|
829
|
+
short_element_xml(:enzymatic_search_constraint, [:enzyme, :max_num_internal_cleavages, :min_number_termini])
|
830
|
+
end +
|
831
|
+
@modifications.to_pepxml +
|
832
|
+
Sequest::PepXML::Parameters.new(@params).to_pepxml
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
def self.from_pepxml_node(node)
|
837
|
+
self.new.from_pepxml_node(node)
|
838
|
+
end
|
839
|
+
|
840
|
+
def from_pepxml_node(node)
|
841
|
+
raise NotImplementedError, "right now we just have the xml node at your disposal"
|
842
|
+
end
|
843
|
+
|
844
|
+
end
|
845
|
+
|
846
|
+
class Sequest::PepXML::Parameters
|
847
|
+
include SpecIDXML
|
848
|
+
|
849
|
+
attr_accessor :params
|
850
|
+
|
851
|
+
def initialize(obj=nil)
|
852
|
+
@params = obj
|
853
|
+
end
|
854
|
+
# (used to be called pepxml_parameters)
|
855
|
+
# Returns xml in the form <parameter name="#{method_name}"
|
856
|
+
# value="#{method_value}"/> for list of symbols
|
857
|
+
def to_pepxml
|
858
|
+
keys_as_symbols = @params.opts.sort.map do |k,v| k.to_s end
|
859
|
+
params_xml(@params, *keys_as_symbols)
|
860
|
+
# (:peptide_mass_tol, :peptide_mass_units, :fragment_ion_tol, :ion_series, :max_num_differential_AA_per_mod, :nucleotide_reading_frame, :num_output_lines, :remove_precursor_peak, :ion_cutoff_percentage, :match_peak_count, :match_peak_allowed_error, :match_peak_tolerance, :protein_mass_filter, :sequence_header_filter)
|
861
|
+
end
|
862
|
+
end
|
863
|
+
|
864
|
+
class Sequest::PepXML::Modifications
|
865
|
+
include SpecIDXML
|
866
|
+
|
867
|
+
# sequest params object
|
868
|
+
attr_accessor :params
|
869
|
+
# array holding AAModifications
|
870
|
+
attr_accessor :aa_mods
|
871
|
+
# array holding TerminalModifications
|
872
|
+
attr_accessor :term_mods
|
873
|
+
# a hash of all differential modifications present by aa_one_letter_symbol
|
874
|
+
# and special_symbol. This is NOT the mass difference but the total mass {
|
875
|
+
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
876
|
+
# the amino acid sequence, they are give the *differential* mass. The
|
877
|
+
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
878
|
+
# cterminus ']' => 14.55 # nterminus
|
879
|
+
attr_accessor :masses_by_diff_mod_hash
|
880
|
+
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
881
|
+
# values are the special_symbols
|
882
|
+
attr_accessor :mod_symbols_hash
|
883
|
+
|
884
|
+
# The modification symbols string looks like this:
|
885
|
+
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
886
|
+
# ct is cterminal peptide (differential)
|
887
|
+
# nt is nterminal peptide (differential)
|
888
|
+
# the C is just cysteine
|
889
|
+
# will set_modifications and masses_by_diff_mod hash
|
890
|
+
def initialize(params=nil, modification_symbols_string='')
|
891
|
+
@params = params
|
892
|
+
if @params
|
893
|
+
set_modifications(params, modification_symbols_string)
|
894
|
+
end
|
895
|
+
end
|
896
|
+
|
897
|
+
# set the masses_by_diff_mod and mod_symbols_hash from
|
898
|
+
def set_hashes(modification_symbols_string)
|
899
|
+
|
900
|
+
@mod_symbols_hash = {}
|
901
|
+
@masses_by_diff_mod = {}
|
902
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
903
|
+
return nil
|
904
|
+
end
|
905
|
+
table = @params.mass_table
|
906
|
+
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
907
|
+
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
908
|
+
if $1 == 'ct' || $1 == 'nt'
|
909
|
+
mass_diff = $3.to_f
|
910
|
+
@masses_by_diff_mod[$2] = mass_diff
|
911
|
+
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
912
|
+
# changed from below to match tests, is this right?
|
913
|
+
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
914
|
+
else
|
915
|
+
symbol_string = $2.dup
|
916
|
+
mass_diff = $3.to_f
|
917
|
+
$1.split('').each do |aa|
|
918
|
+
aa_as_sym = aa.to_sym
|
919
|
+
@masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
920
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
921
|
+
end
|
922
|
+
end
|
923
|
+
end
|
924
|
+
end
|
925
|
+
end
|
926
|
+
|
927
|
+
# given a bare peptide (no end pieces) returns a ModificationInfo object
|
928
|
+
# e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
|
929
|
+
# if there are no modifications, returns nil
|
930
|
+
def modification_info(peptide)
|
931
|
+
if @masses_by_diff_mod.size == 0
|
932
|
+
return nil
|
933
|
+
end
|
934
|
+
hash = {}
|
935
|
+
hash[:modified_peptide] = peptide.dup
|
936
|
+
hsh = @masses_by_diff_mod
|
937
|
+
table = @params.mass_table
|
938
|
+
h = table[:h] # this? or h_plus ??
|
939
|
+
oh = table[:o] + h
|
940
|
+
## only the termini can match a single char
|
941
|
+
if hsh.key? peptide[0,1]
|
942
|
+
# AA + H + differential_mod
|
943
|
+
hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
|
944
|
+
peptide = peptide[1...(peptide.size)]
|
945
|
+
end
|
946
|
+
if hsh.key? peptide[(peptide.size-1),1]
|
947
|
+
# AA + OH + differential_mod
|
948
|
+
hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
|
949
|
+
peptide.slice!( 0..-2 )
|
950
|
+
peptide = peptide[0...(peptide.size-1)]
|
951
|
+
end
|
952
|
+
mod_array = []
|
953
|
+
(0...peptide.size).each do |i|
|
954
|
+
if hsh.key? peptide[i,2]
|
955
|
+
mod_array << Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
|
956
|
+
end
|
957
|
+
end
|
958
|
+
if mod_array.size > 0
|
959
|
+
hash[:mod_aminoacid_masses] = mod_array
|
960
|
+
end
|
961
|
+
if hash.size > 1 # if there is more than just the modified peptide there
|
962
|
+
Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
|
963
|
+
#Sequest::PepXML::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
|
964
|
+
else
|
965
|
+
nil
|
966
|
+
end
|
967
|
+
end
|
968
|
+
|
969
|
+
# returns an array of static mod objects and static terminal mod objects
|
970
|
+
def create_static_mods(params)
|
971
|
+
|
972
|
+
####################################
|
973
|
+
## static mods
|
974
|
+
####################################
|
975
|
+
|
976
|
+
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
977
|
+
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
978
|
+
|
979
|
+
params.mods.each do |k,v|
|
980
|
+
v_to_f = v.to_f
|
981
|
+
if v_to_f != 0.0
|
982
|
+
if k =~ /add_(\w)_/
|
983
|
+
static_mods << [$1.to_sym, v_to_f]
|
984
|
+
else
|
985
|
+
static_terminal_mods << [k, v_to_f]
|
986
|
+
end
|
987
|
+
end
|
988
|
+
end
|
989
|
+
aa_hash = params.mass_table
|
990
|
+
|
991
|
+
## Create the static_mods objects
|
992
|
+
static_mods.map! do |mod|
|
993
|
+
hash = {
|
994
|
+
:aminoacid => mod[0].to_s,
|
995
|
+
:massdiff => mod[1],
|
996
|
+
:mass => aa_hash[mod[0]] + mod[1],
|
997
|
+
:variable => 'N',
|
998
|
+
:binary => 'Y',
|
999
|
+
}
|
1000
|
+
Sequest::PepXML::AAModification.new(hash)
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
## Create the static_terminal_mods objects
|
1004
|
+
static_terminal_mods.map! do |mod|
|
1005
|
+
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
1006
|
+
else ; 'n' # only two possible termini
|
1007
|
+
end
|
1008
|
+
protein_terminus = case mod[0]
|
1009
|
+
when /Nterm_protein/ ; 'n'
|
1010
|
+
when /Cterm_protein/ ; 'c'
|
1011
|
+
else nil
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
# create the hash
|
1015
|
+
hash = {
|
1016
|
+
:terminus => terminus,
|
1017
|
+
:massdiff => mod[1],
|
1018
|
+
:variable => 'N',
|
1019
|
+
:description => mod[0],
|
1020
|
+
}
|
1021
|
+
hash[:protein_terminus] = protein_terminus if protein_terminus
|
1022
|
+
Sequest::PepXML::TerminalModification.new(hash)
|
1023
|
+
end
|
1024
|
+
[static_mods, static_terminal_mods]
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
# 1. sets aa_mods and term_mods from a sequest params object
|
1028
|
+
# 2. sets @params
|
1029
|
+
# 3. sets @masses_by_diff_mod
|
1030
|
+
def set_modifications(params, modification_symbols_string)
|
1031
|
+
@params = params
|
1032
|
+
|
1033
|
+
set_hashes(modification_symbols_string)
|
1034
|
+
(static_mods, static_terminal_mods) = create_static_mods(params)
|
1035
|
+
|
1036
|
+
aa_hash = params.mass_table
|
1037
|
+
#################################
|
1038
|
+
# Variable Mods:
|
1039
|
+
#################################
|
1040
|
+
arr = params.diff_search_options.rstrip.split(/\s+/)
|
1041
|
+
# [aa.to_sym, diff.to_f]
|
1042
|
+
variable_mods = []
|
1043
|
+
(0...arr.size).step(2) do |i|
|
1044
|
+
if arr[i].to_f != 0.0
|
1045
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
1046
|
+
end
|
1047
|
+
end
|
1048
|
+
mod_objects = []
|
1049
|
+
variable_mods.each do |mod|
|
1050
|
+
mod[0].split('').each do |aa|
|
1051
|
+
hash = {
|
1052
|
+
|
1053
|
+
:aminoacid => aa,
|
1054
|
+
:massdiff => mod[1],
|
1055
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
1056
|
+
:variable => 'Y',
|
1057
|
+
:binary => 'N',
|
1058
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
1059
|
+
}
|
1060
|
+
mod_objects << Sequest::PepXML::AAModification.new(hash)
|
1061
|
+
end
|
1062
|
+
end
|
1063
|
+
variable_mods = mod_objects
|
1064
|
+
#################################
|
1065
|
+
# TERMINAL Variable Mods:
|
1066
|
+
#################################
|
1067
|
+
# These are always peptide, not protein termini (for sequest)
|
1068
|
+
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
1069
|
+
|
1070
|
+
to_add = []
|
1071
|
+
if nterm_diff != 0.0
|
1072
|
+
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
1073
|
+
end
|
1074
|
+
if cterm_diff != 0.0
|
1075
|
+
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
1079
|
+
hash = {
|
1080
|
+
:terminus => term,
|
1081
|
+
:massdiff => mssdiff,
|
1082
|
+
:variable => 'Y',
|
1083
|
+
:symbol => symb,
|
1084
|
+
}
|
1085
|
+
Sequest::PepXML::TerminalModification.new(hash)
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
#########################
|
1089
|
+
# COLLECT THEM
|
1090
|
+
#########################
|
1091
|
+
@aa_mods = static_mods + variable_mods
|
1092
|
+
@term_mods = static_terminal_mods + variable_terminal_mods
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
## Generates the pepxml for static and differential amino acid mods based on
|
1096
|
+
## sequest object
|
1097
|
+
def to_pepxml
|
1098
|
+
st = ''
|
1099
|
+
if @aa_mods
|
1100
|
+
st << @aa_mods.map {|v| v.to_pepxml }.join
|
1101
|
+
end
|
1102
|
+
if @term_mods
|
1103
|
+
st << @term_mods.map {|v| v.to_pepxml }.join
|
1104
|
+
end
|
1105
|
+
st
|
1106
|
+
end
|
1107
|
+
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
# Modified aminoacid, static or variable
|
1111
|
+
# unless otherwise stated, all attributes can be anything
|
1112
|
+
class Sequest::PepXML::AAModification
|
1113
|
+
include SpecIDXML
|
1114
|
+
|
1115
|
+
# The amino acid (one letter code)
|
1116
|
+
attr_accessor :aminoacid
|
1117
|
+
# Must be a string!!!!
|
1118
|
+
# Mass difference with respect to unmodified aminoacid, must begin with
|
1119
|
+
# either + (nonnegative) or - [e.g. +1.05446 or -2.3342]
|
1120
|
+
# consider Numeric#to_plus_minus_string at top
|
1121
|
+
attr_accessor :massdiff
|
1122
|
+
# Mass of modified aminoacid
|
1123
|
+
attr_accessor :mass
|
1124
|
+
# Y if both modified and unmodified aminoacid could be present in the
|
1125
|
+
# dataset, N if only modified aminoacid can be present
|
1126
|
+
attr_accessor :variable
|
1127
|
+
# whether modification can reside only at protein terminus (specified 'n',
|
1128
|
+
# 'c', or 'nc')
|
1129
|
+
attr_accessor :peptide_terminus
|
1130
|
+
# MSial symbol used by search engine to designate this modification
|
1131
|
+
attr_accessor :symbol
|
1132
|
+
# Y if each peptide must have only modified or unmodified aminoacid, N if a
|
1133
|
+
# peptide may contain both modified and unmodified aminoacid
|
1134
|
+
attr_accessor :binary
|
1135
|
+
|
1136
|
+
def initialize(hash=nil)
|
1137
|
+
instance_var_set_from_hash(hash) if hash # can use unless there are weird methods
|
1138
|
+
end
|
1139
|
+
|
1140
|
+
def to_pepxml
|
1141
|
+
# note massdiff
|
1142
|
+
short_element_xml_and_att_string("aminoacid_modification", "aminoacid=\"#{aminoacid}\" massdiff=\"#{massdiff.to_plus_minus_string}\" mass=\"#{mass}\" variable=\"#{variable}\" peptide_terminus=\"#{peptide_terminus}\" symbol=\"#{symbol}\" binary=\"#{binary}\"")
|
1143
|
+
end
|
1144
|
+
|
1145
|
+
end
|
1146
|
+
|
1147
|
+
# Modified aminoacid, static or variable
|
1148
|
+
class Sequest::PepXML::TerminalModification
|
1149
|
+
include SpecIDXML
|
1150
|
+
|
1151
|
+
# n for N-terminus, c for C-terminus
|
1152
|
+
attr_accessor :terminus
|
1153
|
+
# Mass difference with respect to unmodified terminus
|
1154
|
+
attr_accessor :massdiff
|
1155
|
+
# Mass of modified terminus
|
1156
|
+
attr_accessor :mass
|
1157
|
+
# Y if both modified and unmodified terminus could be present in the
|
1158
|
+
# dataset, N if only modified terminus can be present
|
1159
|
+
attr_accessor :variable
|
1160
|
+
# MSial symbol used by search engine to designate this modification
|
1161
|
+
attr_accessor :symbol
|
1162
|
+
# whether modification can reside only at protein terminus (specified n or
|
1163
|
+
# c)
|
1164
|
+
attr_accessor :protein_terminus
|
1165
|
+
attr_accessor :description
|
1166
|
+
|
1167
|
+
def initialize(hash=nil)
|
1168
|
+
instance_var_set_from_hash(hash) if hash # can use unless there are weird methods
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
def to_pepxml
|
1172
|
+
#short_element_xml_from_instance_vars("terminal_modification")
|
1173
|
+
short_element_xml_and_att_string("terminal_modification", "terminus=\"#{terminus}\" massdiff=\"#{massdiff.to_plus_minus_string}\" mass=\"#{mass}\" variable=\"#{variable}\" symbol=\"#{symbol}\" protein_terminus=\"#{protein_terminus}\" description=\"#{description}\"")
|
1174
|
+
end
|
1175
|
+
end
|
1176
|
+
|
1177
|
+
|
1178
|
+
class Sequest::PepXML::SearchDatabase
|
1179
|
+
include SpecIDXML
|
1180
|
+
attr_accessor :local_path
|
1181
|
+
attr_writer :seq_type
|
1182
|
+
# Takes a SequestParams object
|
1183
|
+
# Sets :local_path from the params object attr :database
|
1184
|
+
def initialize(params=nil, args=nil)
|
1185
|
+
@seq_type = nil
|
1186
|
+
if params
|
1187
|
+
@local_path = params.database
|
1188
|
+
end
|
1189
|
+
if args ; set_from_hash(args) end
|
1190
|
+
end
|
1191
|
+
|
1192
|
+
def seq_type
|
1193
|
+
if @seq_type ; @seq_type
|
1194
|
+
else
|
1195
|
+
if @local_path =~ /\.fasta/
|
1196
|
+
'AA'
|
1197
|
+
else
|
1198
|
+
abort "Don't recognize type from your database local path: #{@local_path}"
|
1199
|
+
end
|
1200
|
+
end
|
1201
|
+
end
|
1202
|
+
|
1203
|
+
def to_pepxml
|
1204
|
+
short_element_xml_and_att_string(:search_database, "local_path=\"#{local_path}\" type=\"#{seq_type}\"")
|
1205
|
+
end
|
1206
|
+
|
1207
|
+
end
|
1208
|
+
|
1209
|
+
Sequest::PepXML::SpectrumQuery = Arrayclass.new(%w(spectrum start_scan end_scan precursor_neutral_mass index assumed_charge search_results pepxml_version))
|
1210
|
+
|
1211
|
+
class Sequest::PepXML::SpectrumQuery
|
1212
|
+
include SpecIDXML
|
1213
|
+
|
1214
|
+
############################################################
|
1215
|
+
# FOR PEPXML:
|
1216
|
+
############################################################
|
1217
|
+
def to_pepxml
|
1218
|
+
case Sequest::PepXML.pepxml_version
|
1219
|
+
when 18
|
1220
|
+
element_xml("spectrum_query", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
|
1221
|
+
search_results.collect { |sr| sr.to_pepxml }.join
|
1222
|
+
end
|
1223
|
+
end
|
1224
|
+
end
|
1225
|
+
|
1226
|
+
def self.from_pepxml_node(node)
|
1227
|
+
self.new.from_pepxml_node(node)
|
1228
|
+
end
|
1229
|
+
|
1230
|
+
def from_pepxml_node(node)
|
1231
|
+
self[0] = node['spectrum']
|
1232
|
+
self[1] = node['start_scan'].to_i
|
1233
|
+
self[2] = node['end_scan'].to_i
|
1234
|
+
self[3] = node['precursor_neutral_mass'].to_f
|
1235
|
+
self[4] = node['index'].to_i
|
1236
|
+
self[5] = node['assumed_charge'].to_i
|
1237
|
+
self
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
# Returns the precursor_neutral based on the scans and an array indexed by
|
1241
|
+
# scan numbers. first and last scan and charge should be integers.
|
1242
|
+
# This is the precursor_mz - h_plus!
|
1243
|
+
# by=:prec_mz_arr|:deltamass
|
1244
|
+
# if prec_mz_arr then the following arguments must be supplied:
|
1245
|
+
# :first_scan = int, :last_scan = int, :prec_mz_arr = array with the precursor
|
1246
|
+
# m/z for each product scan, :charge = int
|
1247
|
+
# if deltamass then the following arguments must be supplied:
|
1248
|
+
# m_plus_h = float, deltamass = float
|
1249
|
+
# For both flavors, a final additional argument 'average_weights'
|
1250
|
+
# can be used. If true (default), average weights will be used, if false,
|
1251
|
+
# monoisotopic weights (currently this is simply the mass of the proton)
|
1252
|
+
def self.calc_precursor_neutral_mass(by, *args)
|
1253
|
+
average_weights = true
|
1254
|
+
case by
|
1255
|
+
when :prec_mz_arr
|
1256
|
+
(first_scan, last_scan, prec_mz_arr, charge, average_weights) = args
|
1257
|
+
when :deltamass
|
1258
|
+
(m_plus_h, deltamass, average_weights) = args
|
1259
|
+
end
|
1260
|
+
|
1261
|
+
if average_weights
|
1262
|
+
mass_h_plus = SpecID::AVG[:h_plus]
|
1263
|
+
else
|
1264
|
+
mass_h_plus = SpecID::MONO[:h_plus]
|
1265
|
+
end
|
1266
|
+
|
1267
|
+
case by
|
1268
|
+
when :prec_mz_arr
|
1269
|
+
mz = nil
|
1270
|
+
if first_scan != last_scan
|
1271
|
+
sum = 0.0
|
1272
|
+
tot_num = 0
|
1273
|
+
(first_scan..last_scan).each do |scan|
|
1274
|
+
val = prec_mz_arr[scan]
|
1275
|
+
if val # if the scan is not an mslevel 2
|
1276
|
+
sum += val
|
1277
|
+
tot_num += 1
|
1278
|
+
end
|
1279
|
+
end
|
1280
|
+
mz = sum/tot_num
|
1281
|
+
else
|
1282
|
+
mz = prec_mz_arr[first_scan]
|
1283
|
+
end
|
1284
|
+
charge * (mz - mass_h_plus)
|
1285
|
+
when :deltamass
|
1286
|
+
m_plus_h - mass_h_plus + deltamass
|
1287
|
+
else
|
1288
|
+
abort "don't recognize 'by' in calc_precursor_neutral_mass: #{by}"
|
1289
|
+
end
|
1290
|
+
end
|
1291
|
+
|
1292
|
+
end
|
1293
|
+
|
1294
|
+
|
1295
|
+
Sequest::PepXML::SearchHit = Arrayclass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info spectrum_query) )
|
1296
|
+
|
1297
|
+
# 0=hit_rank 1=peptide 2=peptide_prev_aa 3=peptide_next_aa 4=protein 5=num_tot_proteins 6=num_matched_ions 7=tot_num_ions 8=calc_neutral_pep_mass 9=massdiff 10=num_tol_term 11=num_missed_cleavages 12=is_rejected 13=deltacnstar 14=xcorr 15=deltacn 16=spscore 17=sprank 18=modification_info 19=spectrum_query
|
1298
|
+
|
1299
|
+
class Sequest::PepXML::SearchHit
|
1300
|
+
include SpecID::Pep
|
1301
|
+
include SpecIDXML
|
1302
|
+
|
1303
|
+
Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
|
1304
|
+
|
1305
|
+
def aaseq ; self[1] end
|
1306
|
+
def aaseq=(arg) ; self[1] = arg end
|
1307
|
+
|
1308
|
+
# These are all search_score elements:
|
1309
|
+
|
1310
|
+
# 1 if there is no second ranked hit, 0 otherwise
|
1311
|
+
|
1312
|
+
tmp_verb = $VERBOSE
|
1313
|
+
$VERBOSE = nil
|
1314
|
+
def initialize(hash=nil)
|
1315
|
+
super(self.class.size)
|
1316
|
+
if hash
|
1317
|
+
self[0,20] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info], hash[:spectrum_query]]
|
1318
|
+
end
|
1319
|
+
self
|
1320
|
+
end
|
1321
|
+
$VERBOSE = tmp_verb
|
1322
|
+
|
1323
|
+
undef_method :inspect
|
1324
|
+
def inspect
|
1325
|
+
var = @@attributes.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
|
1326
|
+
"#<SearchHit #{var}>"
|
1327
|
+
end
|
1328
|
+
|
1329
|
+
# Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
|
1330
|
+
def self.split_ions(ions)
|
1331
|
+
ions.split("/").map {|ion| ion.to_i }
|
1332
|
+
end
|
1333
|
+
|
1334
|
+
def search_score_xml(symbol)
|
1335
|
+
"#{tabs}<search_score name=\"#{symbol}\" value=\"#{send(symbol)}\"/>"
|
1336
|
+
end
|
1337
|
+
|
1338
|
+
def search_scores_xml(*symbol_list)
|
1339
|
+
symbol_list.collect do |sy|
|
1340
|
+
search_score_xml(sy)
|
1341
|
+
end.join("\n") + "\n"
|
1342
|
+
end
|
1343
|
+
|
1344
|
+
def to_pepxml
|
1345
|
+
mod_pepxml =
|
1346
|
+
if self[18]
|
1347
|
+
self[18].to_pepxml
|
1348
|
+
else
|
1349
|
+
''
|
1350
|
+
end
|
1351
|
+
|
1352
|
+
#string = element_xml_and_att_string("search_hit", [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :protein, :num_tot_proteins, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff_as_string, :num_tol_term, :num_missed_cleavages, :is_rejected]) do
|
1353
|
+
# note the to_plus_minus_string
|
1354
|
+
#puts "MASSDIFF:"
|
1355
|
+
#p massdiff
|
1356
|
+
element_xml_and_att_string("search_hit", "hit_rank=\"#{hit_rank}\" peptide=\"#{peptide}\" peptide_prev_aa=\"#{peptide_prev_aa}\" peptide_next_aa=\"#{peptide_next_aa}\" protein=\"#{protein}\" num_tot_proteins=\"#{num_tot_proteins}\" num_matched_ions=\"#{num_matched_ions}\" tot_num_ions=\"#{tot_num_ions}\" calc_neutral_pep_mass=\"#{calc_neutral_pep_mass}\" massdiff=\"#{massdiff.to_plus_minus_string}\" num_tol_term=\"#{num_tol_term}\" num_missed_cleavages=\"#{num_missed_cleavages}\" is_rejected=\"#{is_rejected}\"") do
|
1357
|
+
mod_pepxml +
|
1358
|
+
search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
|
1359
|
+
end
|
1360
|
+
end
|
1361
|
+
|
1362
|
+
def from_pepxml_node(node)
|
1363
|
+
self[0] = node['hit_rank'].to_i
|
1364
|
+
self[1] = node['peptide']
|
1365
|
+
self[2] = node['peptide_prev_aa']
|
1366
|
+
self[3] = node['peptide_next_aa']
|
1367
|
+
self[4] = node['protein'] ## will this be the string?? (yes, for now)
|
1368
|
+
self[5] = node['num_tot_proteins'].to_i
|
1369
|
+
self[6] = node['num_matched_ions'].to_i
|
1370
|
+
self[7] = node['tot_num_ions'].to_i
|
1371
|
+
self[8] = node['calc_neutral_pep_mass'].to_f
|
1372
|
+
self[9] = node['massdiff'].to_f
|
1373
|
+
self[10] = node['num_tol_term'].to_i
|
1374
|
+
self[11] = node['num_missed_cleavages'].to_i
|
1375
|
+
self[12] = node['is_rejected'].to_i
|
1376
|
+
self
|
1377
|
+
end
|
1378
|
+
|
1379
|
+
end
|
1380
|
+
|
1381
|
+
|
1382
|
+
Sequest::PepXML::SearchHit::ModificationInfo = Arrayclass.new(%w(modified_peptide mod_aminoacid_masses mod_nterm_mass mod_cterm_mass))
|
1383
|
+
|
1384
|
+
# Positions and masses of modifications
|
1385
|
+
class Sequest::PepXML::SearchHit::ModificationInfo
|
1386
|
+
include SpecIDXML
|
1387
|
+
|
1388
|
+
## Should be something like this:
|
1389
|
+
# <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
|
1390
|
+
# <mod_aminoacid_mass position=" " mass=" "/>
|
1391
|
+
# </modification_info>
|
1392
|
+
|
1393
|
+
alias_method :masses, :mod_aminoacid_masses
|
1394
|
+
alias_method :masses=, :mod_aminoacid_masses=
|
1395
|
+
|
1396
|
+
# Mass of modified N terminus<
|
1397
|
+
#attr_accessor :mod_nterm_mass
|
1398
|
+
# Mass of modified C terminus<
|
1399
|
+
#attr_accessor :mod_cterm_mass
|
1400
|
+
# Peptide sequence (with indicated modifications) I'm assuming that the
|
1401
|
+
# native sequest indicators are OK here
|
1402
|
+
#attr_accessor :modified_peptide
|
1403
|
+
|
1404
|
+
# These are objects of type: ...ModAminoacidMass
|
1405
|
+
# position ranges from 1 to peptide length
|
1406
|
+
#attr_accessor :mod_aminoacid_masses
|
1407
|
+
|
1408
|
+
# Will escape any xml special chars in modified_peptide
|
1409
|
+
def to_pepxml
|
1410
|
+
## Collect the modifications:
|
1411
|
+
mod_strings = []
|
1412
|
+
if masses and masses.size > 0
|
1413
|
+
mod_strings = masses.map do |ar|
|
1414
|
+
"position=\"#{ar[0]}\" mass=\"#{ar[1]}\""
|
1415
|
+
end
|
1416
|
+
end
|
1417
|
+
## Create the attribute string:
|
1418
|
+
att_parts = []
|
1419
|
+
if mod_nterm_mass
|
1420
|
+
att_parts << "mod_nterm_mass=\"#{mod_nterm_mass}\""
|
1421
|
+
end
|
1422
|
+
if mod_cterm_mass
|
1423
|
+
att_parts << "mod_cterm_mass=\"#{mod_cterm_mass}\""
|
1424
|
+
end
|
1425
|
+
if modified_peptide
|
1426
|
+
att_parts << "modified_peptide=\"#{escape_special_chars(modified_peptide)}\""
|
1427
|
+
end
|
1428
|
+
element_xml_and_att_string('modification_info', att_parts.join(" ")) do
|
1429
|
+
mod_strings.map {|st| short_element_xml_and_att_string('mod_aminoacid_mass', st) }.join
|
1430
|
+
end
|
1431
|
+
end
|
1432
|
+
|
1433
|
+
def self.from_pepxml_node(node)
|
1434
|
+
self.new.from_pepxml_node(node)
|
1435
|
+
end
|
1436
|
+
|
1437
|
+
# returns self
|
1438
|
+
def from_pepxml_node(node)
|
1439
|
+
self[0] = node['modified_peptide']
|
1440
|
+
self[2] = node['mod_nterm_mass']
|
1441
|
+
self[3] = node['mod_cterm_mass']
|
1442
|
+
masses = []
|
1443
|
+
node.children do |mass_n|
|
1444
|
+
masses << Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
|
1445
|
+
end
|
1446
|
+
self[1] = masses
|
1447
|
+
self
|
1448
|
+
end
|
1449
|
+
|
1450
|
+
##
|
1451
|
+
|
1452
|
+
# <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
|
1453
|
+
# <mod_aminoacid_mass position="2" mass="545.7160"/>
|
1454
|
+
# <mod_aminoacid_mass position="3" mass="147.1926"/>
|
1455
|
+
# </modification_info>
|
1456
|
+
end
|
1457
|
+
|
1458
|
+
Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass = Arrayclass.new(%w(position mass))
|