mspire 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/lib/spec_id/sequest.rb
CHANGED
@@ -7,6 +7,7 @@ require 'spec_id/bioworks'
|
|
7
7
|
require 'instance_var_set_from_hash'
|
8
8
|
require 'spec/msrun'
|
9
9
|
require 'spec_id/srf'
|
10
|
+
require 'fileutils'
|
10
11
|
|
11
12
|
class Numeric
|
12
13
|
# returns a string with a + or - on the front
|
@@ -75,10 +76,10 @@ end
|
|
75
76
|
|
76
77
|
|
77
78
|
|
78
|
-
module
|
79
|
-
class
|
79
|
+
module Sequest; end
|
80
|
+
class Sequest::PepXML; end
|
80
81
|
|
81
|
-
class
|
82
|
+
class Sequest::PepXML::MSMSPipelineAnalysis
|
82
83
|
include SpecIDXML
|
83
84
|
# Version 1.2.3
|
84
85
|
attr_writer :date
|
@@ -106,7 +107,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
|
|
106
107
|
def date
|
107
108
|
if @date ; @date
|
108
109
|
else
|
109
|
-
case
|
110
|
+
case Sequest::PepXML.pepxml_version
|
110
111
|
when 18 ; tarr = Time.now.to_a ; tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
|
111
112
|
when 0 ; Time.new.to_s
|
112
113
|
end
|
@@ -132,7 +133,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
|
|
132
133
|
end
|
133
134
|
|
134
135
|
def to_pepxml
|
135
|
-
case
|
136
|
+
case Sequest::PepXML.pepxml_version
|
136
137
|
when 0
|
137
138
|
element_xml(:msms_pipeline_analysis, [:date, :summary_xml]) do
|
138
139
|
@msms_run_summary.to_pepxml
|
@@ -142,13 +143,13 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
|
|
142
143
|
@msms_run_summary.to_pepxml
|
143
144
|
end
|
144
145
|
else
|
145
|
-
abort "Don't know how to deal with version: #{
|
146
|
+
abort "Don't know how to deal with version: #{Sequest::PepXML.pepxml_version}"
|
146
147
|
end
|
147
148
|
end
|
148
149
|
|
149
150
|
end
|
150
151
|
|
151
|
-
class
|
152
|
+
class Sequest::PepXML::MSMSRunSummary
|
152
153
|
include SpecIDXML
|
153
154
|
|
154
155
|
# the version of TPP you are using (determines xml output)
|
@@ -184,7 +185,7 @@ class SpecID::Sequest::PepXML::MSMSRunSummary
|
|
184
185
|
end
|
185
186
|
|
186
187
|
def to_pepxml
|
187
|
-
case
|
188
|
+
case Sequest::PepXML.pepxml_version
|
188
189
|
when 18
|
189
190
|
element_xml_and_att_string(:msms_run_summary, "base_name=\"#{base_name}\" msManufacturer=\"#{ms_manufacturer}\" msModel=\"#{ms_model}\" msIonization=\"#{ms_ionization}\" msMassAnalyzer=\"#{ms_mass_analyzer}\" msDetector=\"#{ms_detector}\" raw_data_type=\"#{raw_data_type}\" raw_data=\"#{raw_data}\"") do
|
190
191
|
sample_enzyme.to_pepxml +
|
@@ -210,7 +211,7 @@ end
|
|
210
211
|
|
211
212
|
|
212
213
|
|
213
|
-
class
|
214
|
+
class Sequest::PepXML
|
214
215
|
include SpecIDXML
|
215
216
|
|
216
217
|
## CREATE a default version for the entire class
|
@@ -292,21 +293,22 @@ class SpecID::Sequest::PepXML
|
|
292
293
|
# objects. Ideally, we'd like these attributes to reside elsewhere, but for
|
293
294
|
# memory concerns, this is best for now.
|
294
295
|
def self._prot_num_and_first_prot_by_pep(pep_array)
|
295
|
-
pep_array.hash_by(:
|
296
|
-
|
297
|
-
prots
|
298
|
-
|
296
|
+
pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
|
297
|
+
prts = []
|
298
|
+
pep_arr.each { |pep| prts.push( *(pep.prots) ) }
|
299
|
+
prts.uniq!
|
300
|
+
_size = prts.size
|
299
301
|
pep_arr.each do |pep|
|
300
302
|
pep._num_prots = _size.to_s
|
301
|
-
pep._first_prot =
|
303
|
+
pep._first_prot = prts.first
|
302
304
|
end
|
303
305
|
end
|
304
306
|
end
|
305
307
|
|
306
308
|
|
307
|
-
|
308
|
-
:out_path =>
|
309
|
-
|
309
|
+
Default_Options = {
|
310
|
+
:out_path => '.',
|
311
|
+
#:backup_db_path => '.',
|
310
312
|
# a PepXML option
|
311
313
|
:pepxml_version => DEF_VERSION,
|
312
314
|
## MSMSRunSummary options:
|
@@ -314,15 +316,18 @@ class SpecID::Sequest::PepXML
|
|
314
316
|
# or create your own SampleEnzyme object
|
315
317
|
:sample_enzyme => 'trypsin',
|
316
318
|
:ms_manufacturer => 'ThermoFinnigan',
|
317
|
-
:ms_model => 'LCQ Deca XP',
|
319
|
+
:ms_model => 'LCQ Deca XP Plus',
|
318
320
|
:ms_ionization => 'ESI',
|
319
321
|
:ms_mass_analyzer => 'Ion Trap',
|
320
322
|
:ms_detector => 'UNKNOWN',
|
323
|
+
:ms_data => '.', # path to ms data files (raw or mzxml)
|
321
324
|
:raw_data_type => "raw",
|
322
325
|
:raw_data => ".mzXML", ## even if you don't have it?
|
323
326
|
## SearchSummary options:
|
324
327
|
:out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
|
325
|
-
:out_data => ".tgz" ## may be srf??
|
328
|
+
:out_data => ".tgz", ## may be srf??
|
329
|
+
:copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
|
330
|
+
:print => false, # print the objects to file
|
326
331
|
}
|
327
332
|
|
328
333
|
# will dynamically set :ms_model and :ms_mass_analyzer from srf info
|
@@ -330,23 +335,23 @@ class SpecID::Sequest::PepXML
|
|
330
335
|
# and LCQ Deca XP
|
331
336
|
# See SRF::Sequest::PepXML::Default_Options hash for defaults
|
332
337
|
# unless given, the out_path will be given as the path of the srf_file
|
333
|
-
|
338
|
+
# srf may be an object or a filename
|
339
|
+
def self.new_from_srf(srf, opts={})
|
334
340
|
opts = Default_Options.merge(opts)
|
335
341
|
|
336
|
-
##
|
337
|
-
|
338
|
-
|
339
|
-
out_path = File.dirname(srf_file)
|
342
|
+
## read the srf file
|
343
|
+
if srf.is_a? String
|
344
|
+
srf = SRF.new(srf)
|
340
345
|
end
|
341
346
|
|
342
|
-
##
|
343
|
-
|
347
|
+
## set the outpath
|
348
|
+
out_path = opts.delete(:out_path)
|
344
349
|
|
345
350
|
params = srf.params
|
346
351
|
|
347
352
|
## check to see if we need backup_db
|
348
353
|
backup_db_path = opts.delete(:backup_db_path)
|
349
|
-
|
354
|
+
if !File.exist?(params.database) && backup_db_path
|
350
355
|
params.database_path = backup_db_path
|
351
356
|
end
|
352
357
|
|
@@ -374,24 +379,47 @@ class SpecID::Sequest::PepXML
|
|
374
379
|
|
375
380
|
## Create the search summary:
|
376
381
|
search_summary_options = {
|
377
|
-
:search_database =>
|
382
|
+
:search_database => Sequest::PepXML::SearchDatabase.new(params),
|
378
383
|
:base_name => full_base_name_no_ext,
|
379
384
|
:out_data_type => out_data_type,
|
380
385
|
:out_data => out_data
|
381
386
|
}
|
382
|
-
|
387
|
+
modifications_string = srf.header.modifications
|
388
|
+
search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
|
383
389
|
|
384
390
|
## Create the SampleEnzyme object if necessary
|
385
391
|
unless opts[:sample_enzyme].is_a? SampleEnzyme
|
386
392
|
opts[:sample_enzyme] = SampleEnzyme.new(opts[:sample_enzyme])
|
387
393
|
end
|
388
394
|
|
389
|
-
## Create the pepxml obj
|
390
|
-
pepxml_obj =
|
395
|
+
## Create the pepxml obj and top level objects
|
396
|
+
pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
|
397
|
+
pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
|
398
|
+
pepxml_obj.msms_pipeline_analysis = pipeline
|
399
|
+
pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
|
400
|
+
pipeline.msms_run_summary.search_summary = search_summary
|
401
|
+
modifications_obj = search_summary.modifications
|
402
|
+
|
391
403
|
## name some common variables we'll need
|
392
404
|
h_plus = pepxml_obj.h_plus
|
393
405
|
avg_parent = pepxml_obj.avg_parent
|
394
406
|
|
407
|
+
|
408
|
+
## COPY MZXML FILES IF NECESSARY
|
409
|
+
if opts[:copy_mzxml]
|
410
|
+
mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
|
411
|
+
to_copy = Spec::MzXML.file_to_mzxml(mzxml_pathname_noext)
|
412
|
+
if to_copy
|
413
|
+
FileUtils.cp to_copy, out_path
|
414
|
+
else
|
415
|
+
puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
|
416
|
+
puts "Perhaps you need to specifiy the location of the raw data"
|
417
|
+
puts "or need an mzXML converter (readw.exe or t2x)"
|
418
|
+
exit
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
|
395
423
|
#######################################################################
|
396
424
|
# CREATE the spectrum_queries_ar
|
397
425
|
#######################################################################
|
@@ -420,6 +448,8 @@ class SpecID::Sequest::PepXML
|
|
420
448
|
deltacnstar = '1'
|
421
449
|
end
|
422
450
|
|
451
|
+
|
452
|
+
|
423
453
|
## mass calculations:
|
424
454
|
precursor_neutral_mass = dta_file.mh - h_plus
|
425
455
|
calc_neutral_pep_mass = top_hit[0] - h_plus
|
@@ -428,6 +458,9 @@ class SpecID::Sequest::PepXML
|
|
428
458
|
else ; massdiff = massdiff.to_s end
|
429
459
|
|
430
460
|
(start_scan, end_scan, charge) = srf_index[i]
|
461
|
+
|
462
|
+
|
463
|
+
|
431
464
|
sq_hash = {
|
432
465
|
:spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
|
433
466
|
:start_scan => start_scan,
|
@@ -438,9 +471,13 @@ class SpecID::Sequest::PepXML
|
|
438
471
|
:index => files_with_hits_index,
|
439
472
|
}
|
440
473
|
|
474
|
+
spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
|
475
|
+
|
476
|
+
sequence = top_hit[8]
|
477
|
+
|
441
478
|
# NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
|
442
479
|
## THIS IS ALL INNER LOOP, so we make every effort at speed here:
|
443
|
-
(prevaa, pepseq, nextaa) = SpecID::
|
480
|
+
(prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
|
444
481
|
# ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
|
445
482
|
|
446
483
|
sh_hash = {
|
@@ -448,14 +485,14 @@ class SpecID::Sequest::PepXML
|
|
448
485
|
:peptide => pepseq,
|
449
486
|
:peptide_prev_aa => prevaa,
|
450
487
|
:peptide_next_aa => nextaa,
|
451
|
-
:protein => top_hit[9].split(" ").first,
|
452
|
-
:num_tot_proteins => top_hit[
|
488
|
+
:protein => top_hit[9].first.reference.split(" ").first,
|
489
|
+
:num_tot_proteins => top_hit[9].size,
|
453
490
|
:num_matched_ions => top_hit[6],
|
454
491
|
:tot_num_ions => top_hit[7],
|
455
492
|
:calc_neutral_pep_mass => calc_neutral_pep_mass,
|
456
493
|
:massdiff => massdiff,
|
457
|
-
:num_tol_term =>
|
458
|
-
:num_missed_cleavages =>
|
494
|
+
:num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, sequence),
|
495
|
+
:num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, sequence),
|
459
496
|
:is_rejected => '0',
|
460
497
|
# These are search score attributes:
|
461
498
|
:xcorr => top_hit[3],
|
@@ -463,51 +500,88 @@ class SpecID::Sequest::PepXML
|
|
463
500
|
:deltacnstar => deltacnstar,
|
464
501
|
:spscore => top_hit[2],
|
465
502
|
:sprank => top_hit[5],
|
503
|
+
:modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
|
466
504
|
}
|
505
|
+
search_hit = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
|
467
506
|
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
[search_result] # can be multiple
|
473
|
-
end
|
507
|
+
search_result = Sequest::PepXML::SearchResult.new
|
508
|
+
search_result.search_hits = [search_hit]
|
509
|
+
spectrum_query.search_results = [search_result]
|
510
|
+
spectrum_queries_arr[files_with_hits_index] = spectrum_query
|
474
511
|
end
|
475
512
|
spectrum_queries_arr.compact!
|
476
513
|
|
477
|
-
|
478
|
-
# ADD the pipeline analysis
|
479
|
-
#######################################################################
|
480
|
-
|
481
|
-
pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'}) do
|
482
|
-
SpecID::Sequest::PepXML::MSMSRunSummary.new(opts) { spectrum_queries_arr }
|
483
|
-
end
|
484
|
-
pepxml_obj.msms_pipeline_analysis = pipeline
|
514
|
+
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
|
485
515
|
pepxml_obj.base_name = pipeline.msms_run_summary.base_name
|
516
|
+
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
|
517
|
+
|
486
518
|
pepxml_obj
|
487
519
|
end
|
488
520
|
|
521
|
+
# takes an .srg or bioworks.xml file
|
522
|
+
# if possible, ensures that an mzXML file is present for each pepxml file
|
523
|
+
# :print => true, will print files
|
524
|
+
def self.set_from_bioworks(bioworks_file, opts={})
|
525
|
+
opts = Default_Options.merge(opts)
|
526
|
+
## Create the out_path directory if necessary
|
527
|
+
|
528
|
+
unless File.exist? opts[:out_path]
|
529
|
+
FileUtils.mkpath(opts[:out_path])
|
530
|
+
end
|
531
|
+
unless File.directory? opts[:out_path]
|
532
|
+
abort "#{opts[:out_path]} must be a directory!"
|
533
|
+
end
|
534
|
+
|
535
|
+
spec_id = SpecID.new(bioworks_file)
|
536
|
+
pepxml_objs =
|
537
|
+
if spec_id.is_a? Bioworks
|
538
|
+
abort("must have opts[:params] set!") unless opts[:params]
|
539
|
+
set_from_bioworks_xml(bioworks_file, opts[:params], opts)
|
540
|
+
elsif spec_id.is_a? SRFGroup
|
541
|
+
spec_id.srfs.map do |srf|
|
542
|
+
new_from_srf(srf, opts)
|
543
|
+
end
|
544
|
+
else
|
545
|
+
abort "invalid object"
|
546
|
+
end
|
547
|
+
|
548
|
+
if opts[:print]
|
549
|
+
pepxml_objs.each do |obj|
|
550
|
+
obj.to_pepxml(obj.base_name + ".xml")
|
551
|
+
end
|
552
|
+
end
|
553
|
+
pepxml_objs
|
554
|
+
end
|
555
|
+
|
556
|
+
|
489
557
|
# Takes bioworks 3.2/3.3 xml output (with no filters)
|
490
558
|
# Returns a list of PepXML objects
|
491
|
-
# msdata = path to mzXML files (or .timeIndex files) (or @TODO: path to sqt file(s))
|
492
559
|
# params = sequest.params file
|
493
560
|
# bioworks = bioworks.xml exported multi-consensus view file
|
494
561
|
# pepxml_version = 0 for tpp 1.2.3
|
495
562
|
# pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
|
496
|
-
def self.
|
563
|
+
def self.set_from_bioworks_xml(bioworks, params, opts={})
|
564
|
+
opts = Default_Options.merge(opts)
|
565
|
+
pepxml_version, sample_enzyme, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :sample_enzyme, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
|
566
|
+
|
567
|
+
unless out_path
|
568
|
+
out_path = '.'
|
569
|
+
end
|
570
|
+
|
497
571
|
supported_versions = [0,18]
|
498
572
|
|
499
|
-
unless supported_versions.include?(pepxml_version)
|
573
|
+
unless supported_versions.include?(opts[:pepxml_version])
|
500
574
|
abort "pepxml_version: #{pepxml_version} not currently supported. Current support is for versions #{supported_versions.join(', ')}"
|
501
575
|
end
|
502
576
|
|
503
577
|
## Turn params and bioworks_obj into objects if necessary:
|
504
578
|
# Params:
|
505
|
-
if params.class ==
|
506
|
-
elsif params.class == String ; params =
|
579
|
+
if params.class == Sequest::Params # OK!
|
580
|
+
elsif params.class == String ; params = Sequest::Params.new(params)
|
507
581
|
else ; abort "Don't recognize #{params} as object or string!"
|
508
582
|
end
|
509
583
|
# Bioworks:
|
510
|
-
if bioworks.class ==
|
584
|
+
if bioworks.class == Bioworks # OK!
|
511
585
|
elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
|
512
586
|
else ; abort "Don't recognize #{bioworks} as object or string!"
|
513
587
|
end
|
@@ -516,39 +590,98 @@ class SpecID::Sequest::PepXML
|
|
516
590
|
|
517
591
|
## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
|
518
592
|
## bioworks
|
519
|
-
#bioworks.peps.each { |pep| if pep.class !=
|
593
|
+
#bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
|
520
594
|
|
595
|
+
## check to see if we need backup_db
|
596
|
+
|
597
|
+
backup_db_path = opts.delete(:backup_db_path)
|
598
|
+
if !File.exist?(params.database) && backup_db_path
|
599
|
+
params.database_path = backup_db_path
|
600
|
+
end
|
521
601
|
|
522
602
|
## Start
|
523
603
|
split_bio_objs = []
|
524
604
|
|
525
|
-
## Create a hash by pep object containing num_tot_proteins
|
526
|
-
## This is only valid if all hits are present (no previous thresholding)
|
527
|
-
self._prot_num_and_first_prot_by_pep(bioworks.peps)
|
528
605
|
## (num_prots_by_pep, prot_by_pep) =
|
529
606
|
#num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"
|
530
607
|
|
608
|
+
modifications_string = bioworks.modifications
|
609
|
+
search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
|
610
|
+
modifications_obj = search_summary.modifications
|
611
|
+
|
531
612
|
## Create a hash of spectrum_query arrays by filename (this very big block):
|
532
613
|
spectrum_queries_by_base_name = {}
|
533
|
-
pepxml_objs_by_base_name = {}
|
534
614
|
# Hash by the filenames to split into filenames:
|
535
|
-
bioworks.peps.hash_by(:base_name).
|
615
|
+
bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|
|
536
616
|
|
617
|
+
pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
|
618
|
+
full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
|
619
|
+
|
620
|
+
case pepxml_version
|
621
|
+
when 18
|
622
|
+
pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
|
623
|
+
msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
|
624
|
+
:base_name => full_base_name_no_ext,
|
625
|
+
:ms_manufacturer => ms_manufacturer,
|
626
|
+
:ms_model => ms_model,
|
627
|
+
:ms_ionization => ms_ionization,
|
628
|
+
:ms_mass_analyzer => ms_mass_analyzer,
|
629
|
+
:ms_detector => ms_detector,
|
630
|
+
:raw_data_type => raw_data_type,
|
631
|
+
:raw_data => raw_data,
|
632
|
+
:sample_enzyme => SampleEnzyme.new(sample_enzyme),
|
633
|
+
:search_summary => search_summary,
|
634
|
+
})
|
635
|
+
pipeline.msms_run_summary = msms_run_summary
|
636
|
+
pepxml_obj.msms_pipeline_analysis = pipeline
|
637
|
+
pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name = full_base_name_no_ext
|
638
|
+
pepxml_obj.base_name = full_base_name_no_ext
|
639
|
+
pepxml_obj
|
640
|
+
when 0
|
641
|
+
## @TODO: NEED TO REVAMP THIS:
|
642
|
+
# Sequest::PepXML.new(pepxml_version).set_from_hash({
|
643
|
+
# :params => params,
|
644
|
+
# :search_results => spectrum_queries_arr,
|
645
|
+
# :base_name => self.make_base_name( File.expand_path(out_path), base_name),
|
646
|
+
# :search_engine => params.search_engine,
|
647
|
+
# :database => params.database,
|
648
|
+
# :raw_data_type => "mzXML",
|
649
|
+
# :raw_data => ".mzXML",
|
650
|
+
# :out_data_type => "out",
|
651
|
+
# :out_data => ".tgz",
|
652
|
+
# :sample_enzyme => params.enzyme,
|
653
|
+
# })
|
654
|
+
end
|
655
|
+
|
656
|
+
|
657
|
+
|
658
|
+
|
659
|
+
|
660
|
+
# Create a hash by pep object containing num_tot_proteins
|
661
|
+
# This is only valid if all hits are present (no previous thresholding)
|
662
|
+
# Since out2summary only acts on one folder at a time,
|
663
|
+
# we should only do it for one folder at a time! (that's why we do this
|
664
|
+
# here instead of globally)
|
665
|
+
self._prot_num_and_first_prot_by_pep(pep_arr)
|
537
666
|
prec_mz_arr = nil
|
538
667
|
case x = bioworks.version
|
539
668
|
when /3.2/
|
540
669
|
calc_prec_by = :prec_mz_arr
|
541
670
|
# get the precursor_mz array for this filename
|
542
|
-
|
543
|
-
prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(inner__full_base_name_no_ext)
|
671
|
+
prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(File.join(ms_data, base_name))
|
544
672
|
when /3.3/
|
545
673
|
calc_prec_by = :deltamass
|
546
674
|
else
|
547
675
|
abort "invalid BioworksBrowser version: #{x}"
|
548
676
|
end
|
549
677
|
|
550
|
-
|
551
|
-
|
678
|
+
if opts[:copy_mzxml]
|
679
|
+
to_copy = Spec::MzXML.file_to_mzxml(File.join(ms_data, base_name))
|
680
|
+
if to_copy
|
681
|
+
FileUtils.cp to_copy, out_path
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
552
685
|
|
553
686
|
spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
|
554
687
|
|
@@ -561,9 +694,9 @@ class SpecID::Sequest::PepXML
|
|
561
694
|
|
562
695
|
case calc_prec_by
|
563
696
|
when :prec_mz_arr
|
564
|
-
precursor_neutral_mass =
|
697
|
+
precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
|
565
698
|
when :deltamass
|
566
|
-
precursor_neutral_mass =
|
699
|
+
precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
|
567
700
|
end
|
568
701
|
|
569
702
|
calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
|
@@ -581,98 +714,58 @@ class SpecID::Sequest::PepXML
|
|
581
714
|
end
|
582
715
|
# Create the nested structure of queries{results{hits}}
|
583
716
|
# (Ruby's blocks work beautifully for things like this)
|
584
|
-
spec_query =
|
717
|
+
spec_query = Sequest::PepXML::SpectrumQuery.new({
|
585
718
|
:spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
|
586
719
|
:start_scan => top_pep.first_scan,
|
587
720
|
:end_scan => top_pep.last_scan,
|
588
721
|
:precursor_neutral_mass => precursor_neutral_mass.to_s,
|
589
722
|
:assumed_charge => top_pep.charge,
|
590
723
|
:pepxml_version => pepxml_version,
|
591
|
-
})
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
724
|
+
})
|
725
|
+
|
726
|
+
|
727
|
+
search_result = Sequest::PepXML::SearchResult.new
|
728
|
+
|
729
|
+
## Calculate some interdependent values;
|
730
|
+
# NOTE: the bioworks mass is reallyf M+H if two or more scans went
|
731
|
+
# into the search_hit; calc_neutral_pep_mass is simply the avg of
|
732
|
+
# precursor masses adjusted to be neutral
|
733
|
+
(prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
|
734
|
+
(num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
|
735
|
+
search_hit = Sequest::PepXML::SearchHit.new({
|
736
|
+
:hit_rank => "1",
|
737
|
+
:peptide => pepseq,
|
738
|
+
:peptide_prev_aa => prevaa,
|
739
|
+
:peptide_next_aa => nextaa,
|
740
|
+
:protein => top_pep._first_prot.reference.split(" ").first,
|
741
|
+
:num_tot_proteins => top_pep._num_prots,
|
742
|
+
:num_matched_ions => num_matched_ions,
|
743
|
+
:tot_num_ions => tot_num_ions,
|
744
|
+
:calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
|
745
|
+
:massdiff => massdiff,
|
746
|
+
:num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
|
747
|
+
:num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
|
748
|
+
:is_rejected => "0",
|
749
|
+
# These are search score attributes:
|
750
|
+
:xcorr => top_pep.xcorr,
|
751
|
+
:deltacn => top_pep.deltacn,
|
752
|
+
:deltacnstar => deltacnstar,
|
753
|
+
:spscore => top_pep.sp,
|
754
|
+
:sprank => top_pep.rsp,
|
755
|
+
:modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
|
756
|
+
})
|
757
|
+
search_result.search_hits = [search_hit] # there can be multiple search hits
|
758
|
+
spec_query.search_results = [search_result] # can be multiple search_results
|
759
|
+
spec_query
|
760
|
+
end
|
626
761
|
|
627
762
|
# create an index by spectrum as results end up typically in out2summary
|
628
763
|
# (I really dislike this order, however)
|
629
764
|
spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
|
630
765
|
spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
|
631
|
-
|
632
|
-
|
633
|
-
end
|
634
|
-
|
635
|
-
modifications_string = bioworks.modifications
|
636
|
-
|
637
|
-
spectrum_queries_by_base_name.collect do |base_name, spectrum_queries_ar|
|
638
|
-
case pepxml_version
|
639
|
-
when 18
|
640
|
-
pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'}) do
|
641
|
-
full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
|
642
|
-
SpecID::Sequest::PepXML::MSMSRunSummary.new({
|
643
|
-
:base_name => full_base_name_no_ext,
|
644
|
-
:ms_manufacturer => ms_manufacturer,
|
645
|
-
:ms_model => ms_model,
|
646
|
-
:ms_ionization => ms_ionization,
|
647
|
-
:ms_mass_analyzer => ms_mass_analyzer,
|
648
|
-
:ms_detector => ms_detector,
|
649
|
-
:raw_data_type => raw_data_type,
|
650
|
-
:raw_data => raw_data,
|
651
|
-
:sample_enzyme => SampleEnzyme.new(sample_enzyme),
|
652
|
-
:search_summary => SpecID::Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params), :base_name => full_base_name_no_ext, :out_data_type => out_data_type, :out_data => out_data}),
|
653
|
-
}) { spectrum_queries_ar }
|
654
|
-
end
|
655
|
-
pepxml_obj = pepxml_objs_by_base_name[base_name]
|
656
|
-
pepxml_obj.msms_pipeline_analysis = pipeline
|
657
|
-
pepxml_obj.base_name = pipeline.msms_run_summary.base_name
|
658
|
-
pepxml_obj
|
659
|
-
when 0
|
660
|
-
## @TODO: NEED TO REVAMP THIS:
|
661
|
-
# SpecID::Sequest::PepXML.new(pepxml_version).set_from_hash({
|
662
|
-
# :params => params,
|
663
|
-
# :search_results => spectrum_queries_arr,
|
664
|
-
# :base_name => self.make_base_name( File.expand_path(out_path), base_name),
|
665
|
-
# :search_engine => params.search_engine,
|
666
|
-
# :database => params.database,
|
667
|
-
# :raw_data_type => "mzXML",
|
668
|
-
# :raw_data => ".mzXML",
|
669
|
-
# :out_data_type => "out",
|
670
|
-
# :out_data => ".tgz",
|
671
|
-
# :sample_enzyme => params.enzyme,
|
672
|
-
# })
|
673
|
-
end
|
674
|
-
end # collects the pepxml objects
|
675
|
-
|
766
|
+
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
|
767
|
+
pepxml_obj
|
768
|
+
end ## collects pepxml_objs
|
676
769
|
end
|
677
770
|
|
678
771
|
def summary_xml
|
@@ -724,7 +817,7 @@ end # PepXML
|
|
724
817
|
##
|
725
818
|
# In the future, this guy should accept any version of bioworks params file
|
726
819
|
# and spit out any param queried.
|
727
|
-
class
|
820
|
+
class Sequest::Params
|
728
821
|
include SpecIDXML
|
729
822
|
|
730
823
|
# current attributes supported are:
|
@@ -941,7 +1034,7 @@ class SpecID::Sequest::Params
|
|
941
1034
|
|
942
1035
|
end
|
943
1036
|
|
944
|
-
class
|
1037
|
+
class Sequest::PepXML::SearchResult
|
945
1038
|
include SpecIDXML
|
946
1039
|
# an array of search_hits
|
947
1040
|
attr_accessor :search_hits
|
@@ -959,7 +1052,7 @@ class SpecID::Sequest::PepXML::SearchResult
|
|
959
1052
|
end
|
960
1053
|
end
|
961
1054
|
|
962
|
-
class
|
1055
|
+
class Sequest::PepXML::SearchSummary
|
963
1056
|
include SpecIDXML
|
964
1057
|
attr_accessor :params
|
965
1058
|
attr_accessor :base_name
|
@@ -974,7 +1067,7 @@ class SpecID::Sequest::PepXML::SearchSummary
|
|
974
1067
|
def initialize(params, modifications_string='', args=nil)
|
975
1068
|
@search_id = nil
|
976
1069
|
@params = params
|
977
|
-
@modifications =
|
1070
|
+
@modifications = Sequest::PepXML::Modifications.new(params, modifications_string)
|
978
1071
|
if args ; set_from_hash(args) end
|
979
1072
|
end
|
980
1073
|
|
@@ -999,7 +1092,7 @@ class SpecID::Sequest::PepXML::SearchSummary
|
|
999
1092
|
|
1000
1093
|
end
|
1001
1094
|
|
1002
|
-
class
|
1095
|
+
class Sequest::PepXML::Modifications
|
1003
1096
|
include SpecIDXML
|
1004
1097
|
|
1005
1098
|
# sequest params object
|
@@ -1032,20 +1125,27 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1032
1125
|
|
1033
1126
|
# set the masses_by_diff_mod and mod_symbols_hash from
|
1034
1127
|
def set_hashes(modification_symbols_string)
|
1128
|
+
|
1035
1129
|
@mod_symbols_hash = {}
|
1036
1130
|
@masses_by_diff_mod = {}
|
1037
|
-
if modification_symbols_string == nil || modification_symbols_string == ''
|
1131
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
1038
1132
|
return nil
|
1039
1133
|
end
|
1040
1134
|
table = @params.mass_table
|
1041
1135
|
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
1042
|
-
if mod =~ /\(?(\w
|
1043
|
-
aa_as_sym = $1.to_sym,
|
1044
|
-
@mod_symbols_hash[[aa_as_sym, $3.to_f]] = $2.dup
|
1136
|
+
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
1045
1137
|
if $1 == 'ct' || $1 == 'nt'
|
1046
|
-
|
1138
|
+
mass_diff = $3.to_f
|
1139
|
+
@masses_by_diff_mod[$2] = mass_diff
|
1140
|
+
@mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
1047
1141
|
else
|
1048
|
-
|
1142
|
+
symbol_string = $2.dup
|
1143
|
+
mass_diff = $3.to_f
|
1144
|
+
$1.split('').each do |aa|
|
1145
|
+
aa_as_sym = aa.to_sym
|
1146
|
+
@masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
1147
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
1148
|
+
end
|
1049
1149
|
end
|
1050
1150
|
end
|
1051
1151
|
end
|
@@ -1058,8 +1158,8 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1058
1158
|
if @masses_by_diff_mod.size == 0
|
1059
1159
|
return nil
|
1060
1160
|
end
|
1061
|
-
hash[:modified_peptide] = peptide.dup
|
1062
1161
|
hash = {}
|
1162
|
+
hash[:modified_peptide] = peptide.dup
|
1063
1163
|
hsh = @masses_by_diff_mod
|
1064
1164
|
table = @params.mass_table
|
1065
1165
|
h = table[:h] # this? or h_plus ??
|
@@ -1068,12 +1168,13 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1068
1168
|
if hsh.key? peptide[0,1]
|
1069
1169
|
# AA + H + differential_mod
|
1070
1170
|
hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
|
1071
|
-
peptide
|
1171
|
+
peptide = peptide[1...(peptide.size)]
|
1072
1172
|
end
|
1073
|
-
if hsh.key? peptide[-1,1]
|
1173
|
+
if hsh.key? peptide[(peptide.size-1),1]
|
1074
1174
|
# AA + OH + differential_mod
|
1075
|
-
hash[:mod_cterm_mass] = table[peptide[-2,1].to_sym] + oh + hsh[peptide[-1,1]]
|
1175
|
+
hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
|
1076
1176
|
peptide.slice!( 0..-2 )
|
1177
|
+
peptide = peptide[0...(peptide.size-1)]
|
1077
1178
|
end
|
1078
1179
|
mod_array = []
|
1079
1180
|
(0...peptide.size).each do |i|
|
@@ -1084,8 +1185,8 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1084
1185
|
if mod_array.size > 0
|
1085
1186
|
hash[:mod_aminoacid_mass_array] = mod_array
|
1086
1187
|
end
|
1087
|
-
if hash.size >
|
1088
|
-
|
1188
|
+
if hash.size > 1 # if there is more than just the modified peptide there
|
1189
|
+
Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
|
1089
1190
|
else
|
1090
1191
|
nil
|
1091
1192
|
end
|
@@ -1127,7 +1228,7 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1127
1228
|
:variable => 'N',
|
1128
1229
|
:binary => 'Y',
|
1129
1230
|
}
|
1130
|
-
|
1231
|
+
Sequest::PepXML::AAModification.new(hash)
|
1131
1232
|
end
|
1132
1233
|
|
1133
1234
|
## Create the static_terminal_mods objects
|
@@ -1149,7 +1250,7 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1149
1250
|
:description => mod[0],
|
1150
1251
|
}
|
1151
1252
|
hash[:protein_terminus] = protein_terminus if protein_terminus
|
1152
|
-
|
1253
|
+
Sequest::PepXML::TerminalModification.new(hash)
|
1153
1254
|
end
|
1154
1255
|
#################################
|
1155
1256
|
# Variable Mods:
|
@@ -1159,20 +1260,25 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1159
1260
|
variable_mods = []
|
1160
1261
|
(0...arr.size).step(2) do |i|
|
1161
1262
|
if arr[i].to_f != 0.0
|
1162
|
-
variable_mods << [arr[i+1]
|
1263
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
1163
1264
|
end
|
1164
1265
|
end
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1266
|
+
mod_objects = []
|
1267
|
+
variable_mods.each do |mod|
|
1268
|
+
mod[0].split('').each do |aa|
|
1269
|
+
hash = {
|
1270
|
+
|
1271
|
+
:aminoacid => aa,
|
1272
|
+
:massdiff => mod[1].to_plus_minus_string,
|
1273
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
1274
|
+
:variable => 'Y',
|
1275
|
+
:binary => 'N',
|
1276
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
1277
|
+
}
|
1278
|
+
mod_objects << Sequest::PepXML::AAModification.new(hash)
|
1279
|
+
end
|
1175
1280
|
end
|
1281
|
+
variable_mods = mod_objects
|
1176
1282
|
#################################
|
1177
1283
|
# TERMINAL Variable Mods:
|
1178
1284
|
#################################
|
@@ -1194,7 +1300,7 @@ class SpecID::Sequest::PepXML::Modifications
|
|
1194
1300
|
:variable => 'Y',
|
1195
1301
|
:symbol => symb,
|
1196
1302
|
}
|
1197
|
-
|
1303
|
+
Sequest::PepXML::TerminalModification.new(hash)
|
1198
1304
|
end
|
1199
1305
|
|
1200
1306
|
#########################
|
@@ -1221,7 +1327,7 @@ end
|
|
1221
1327
|
|
1222
1328
|
# Modified aminoacid, static or variable
|
1223
1329
|
# unless otherwise stated, all attributes can be anything
|
1224
|
-
class
|
1330
|
+
class Sequest::PepXML::AAModification
|
1225
1331
|
include SpecIDXML
|
1226
1332
|
|
1227
1333
|
# The amino acid (one letter code)
|
@@ -1256,7 +1362,7 @@ class SpecID::Sequest::PepXML::AAModification
|
|
1256
1362
|
end
|
1257
1363
|
|
1258
1364
|
# Modified aminoacid, static or variable
|
1259
|
-
class
|
1365
|
+
class Sequest::PepXML::TerminalModification
|
1260
1366
|
include SpecIDXML
|
1261
1367
|
|
1262
1368
|
# n for N-terminus, c for C-terminus
|
@@ -1285,7 +1391,7 @@ class SpecID::Sequest::PepXML::TerminalModification
|
|
1285
1391
|
end
|
1286
1392
|
|
1287
1393
|
|
1288
|
-
class
|
1394
|
+
class Sequest::PepXML::SearchDatabase
|
1289
1395
|
include SpecIDXML
|
1290
1396
|
attr_accessor :local_path
|
1291
1397
|
attr_writer :seq_type
|
@@ -1316,7 +1422,7 @@ class SpecID::Sequest::PepXML::SearchDatabase
|
|
1316
1422
|
|
1317
1423
|
end
|
1318
1424
|
|
1319
|
-
class
|
1425
|
+
class Sequest::PepXML::SpectrumQuery
|
1320
1426
|
include SpecIDXML
|
1321
1427
|
|
1322
1428
|
# basename_noext.first_scan.last_scan.charge
|
@@ -1344,7 +1450,7 @@ class SpecID::Sequest::PepXML::SpectrumQuery
|
|
1344
1450
|
# FOR PEPXML:
|
1345
1451
|
############################################################
|
1346
1452
|
def to_pepxml
|
1347
|
-
case
|
1453
|
+
case Sequest::PepXML.pepxml_version
|
1348
1454
|
when 18
|
1349
1455
|
element_xml("spectrum_query", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
|
1350
1456
|
@search_results.collect { |sr| sr.to_pepxml }.join
|
@@ -1412,102 +1518,35 @@ class SpecID::Sequest::PepXML::SpectrumQuery
|
|
1412
1518
|
|
1413
1519
|
end
|
1414
1520
|
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
#
|
1420
|
-
|
1421
|
-
|
1422
|
-
class SpecID::Sequest::PepXML::SearchHit < Array
|
1521
|
+
|
1522
|
+
|
1523
|
+
Sequest::PepXML::SearchHit = ArrayClass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info) )
|
1524
|
+
|
1525
|
+
# hit_rank=0 peptide=1 peptide_prev_aa=2 peptide_next_aa=3 protein=4 num_tot_proteins=5 num_matched_ions=6 tot_num_ions=7 calc_neutral_pep_mass=8 massdiff=9 num_tol_term=10 num_missed_cleavages=11 is_rejected=12 deltacnstar=13 xcorr=14 deltacn=15 spscore=16 sprank=17 modification_info=18
|
1526
|
+
|
1527
|
+
class Sequest::PepXML::SearchHit
|
1423
1528
|
include SpecIDXML
|
1424
1529
|
|
1425
1530
|
Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
|
1426
1531
|
|
1427
|
-
# num_tot_proteins = "Number of unique proteins in search database containing peptide"
|
1428
|
-
#attr_accessor 0:hit_rank, 1:peptide, 2:peptide_prev_aa, 3:peptide_next_aa, 4:protein, 5:num_tot_proteins, 6:num_matched_ions, 7:tot_num_ions, 8:calc_neutral_pep_mass, 9:massdiff, 10:num_tol_term, 11:num_missed_cleavages, 12:is_rejected
|
1429
|
-
#attr_accessor 13:deltacnstar
|
1430
|
-
#attr_accessor 14:xcorr, 15:deltacn, 16:spscore, 17:sprank
|
1431
|
-
ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
|
1432
|
-
|
1433
|
-
ind_keys = {:hit_rank => 0, :peptide => 1, :peptide_prev_aa => 2, :peptide_next_aa => 3, :protein => 4, :num_tot_proteins => 5, :num_matched_ions => 6, :tot_num_ions => 7, :calc_neutral_pep_mass => 8, :massdiff => 9, :num_tol_term => 10, :num_missed_cleavages => 11, :is_rejected => 12, :deltacnstar => 13, :xcorr => 14, :deltacn => 15, :spscore => 16, :sprank => 17}
|
1434
|
-
@@methods = ind_keys.keys
|
1435
|
-
def hit_rank ; self[0] end ; def hit_rank=(oth) ; self[0] = oth end
|
1436
|
-
def peptide ; self[1] end ; def peptide=(oth) ; self[1] = oth end
|
1437
|
-
def peptide_prev_aa ; self[2] end ; def peptide_prev_aa=(oth) ; self[2] = oth end
|
1438
|
-
def peptide_next_aa ; self[3] end ; def peptide_next_aa=(oth) ; self[3] = oth end
|
1439
|
-
def protein ; self[4] end ; def protein=(oth) ; self[4] = oth end
|
1440
|
-
def num_tot_proteins ; self[5] end ; def num_tot_proteins=(oth) ; self[5] = oth end
|
1441
|
-
def num_matched_ions ; self[6] end ; def num_matched_ions=(oth) ; self[6] = oth end
|
1442
|
-
def tot_num_ions ; self[7] end ; def tot_num_ions=(oth) ; self[7] = oth end
|
1443
|
-
def calc_neutral_pep_mass ; self[8] end ; def calc_neutral_pep_mass=(oth) ; self[8] = oth end
|
1444
|
-
def massdiff ; self[9] end ; def massdiff=(oth) ; self[9] = oth end
|
1445
|
-
def num_tol_term ; self[10] end ; def num_tol_term=(oth) ; self[10] = oth end
|
1446
|
-
def num_missed_cleavages ; self[11] end ; def num_missed_cleavages=(oth) ; self[11] = oth end
|
1447
|
-
def is_rejected ; self[12] end ; def is_rejected=(oth) ; self[12] = oth end
|
1448
|
-
def deltacnstar ; self[13] end ; def deltacnstar=(oth) ; self[13] = oth end
|
1449
|
-
def xcorr ; self[14] end ; def xcorr=(oth) ; self[14] = oth end
|
1450
|
-
def deltacn ; self[15] end ; def deltacn=(oth) ; self[15] = oth end
|
1451
|
-
def spscore ; self[16] end ; def spscore=(oth) ; self[16] = oth end
|
1452
|
-
def sprank ; self[17] end ; def sprank=(oth) ; self[17] = oth end
|
1453
|
-
|
1454
|
-
@@arr_size = ind_keys.size
|
1455
|
-
ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
|
1456
|
-
ind_keys.merge!(ind_keys_w_eq)
|
1457
|
-
ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
|
1458
1532
|
|
1459
1533
|
# These are all search_score elements:
|
1460
1534
|
|
1461
1535
|
# 1 if there is no second ranked hit, 0 otherwise
|
1462
1536
|
|
1537
|
+
tmp_verb = $VERBOSE
|
1538
|
+
$VERBOSE = nil
|
1463
1539
|
def initialize(hash=nil)
|
1464
1540
|
super(@@arr_size)
|
1465
|
-
|
1466
|
-
|
1467
|
-
#if hash ; set_from_hash(hash) end
|
1468
|
-
end
|
1469
|
-
|
1470
|
-
# remove_non_amino_acids && split_sequence
|
1471
|
-
def self.prepare_sequence(val)
|
1472
|
-
nv = remove_non_amino_acids(val)
|
1473
|
-
split_sequence(nv)
|
1474
|
-
end
|
1475
|
-
|
1476
|
-
# Returns prev, peptide, next from sequence. Parse errors return
|
1477
|
-
# nil,nil,nil
|
1478
|
-
# R.PEPTIDE.A # -> R, PEPTIDE, A
|
1479
|
-
# R.PEPTIDE.- # -> R, PEPTIDE, -
|
1480
|
-
# PEPTIDE.A # -> -, PEPTIDE, A
|
1481
|
-
# A.PEPTIDE # -> A, PEPTIDE, -
|
1482
|
-
# PEPTIDE # -> nil,nil,nil
|
1483
|
-
def self.split_sequence(val)
|
1484
|
-
peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
|
1485
|
-
pieces = val.split('.')
|
1486
|
-
case pieces.size
|
1487
|
-
when 3
|
1488
|
-
peptide_prev_aa, peptide, peptide_next_aa = *pieces
|
1489
|
-
when 2
|
1490
|
-
if pieces[0].size > 1 ## N termini
|
1491
|
-
peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
|
1492
|
-
else ## C termini
|
1493
|
-
peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
|
1494
|
-
end
|
1495
|
-
when 1 ## this must be a parse error!
|
1496
|
-
peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
|
1497
|
-
when 0
|
1498
|
-
peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
|
1541
|
+
if hash
|
1542
|
+
self[0,19] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info]]
|
1499
1543
|
end
|
1500
|
-
|
1501
|
-
end
|
1502
|
-
|
1503
|
-
# removes nonstandard chars with Non_standard_amino_acid_char_re
|
1504
|
-
# preserves A-Z and '.
|
1505
|
-
def self.remove_non_amino_acids(sequence)
|
1506
|
-
sequence.gsub(Non_standard_amino_acid_char_re, '')
|
1544
|
+
self
|
1507
1545
|
end
|
1546
|
+
$VERBOSE = tmp_verb
|
1508
1547
|
|
1509
1548
|
def inspect
|
1510
|
-
var = @@
|
1549
|
+
var = @@attributes.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
|
1511
1550
|
"#<SearchHit #{var}>"
|
1512
1551
|
end
|
1513
1552
|
|
@@ -1515,7 +1554,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
|
|
1515
1554
|
def self.calc_num_missed_cleavages(params, sequence)
|
1516
1555
|
num_missed = 0
|
1517
1556
|
split_after, except_before = params.enzyme_specificity
|
1518
|
-
first, middle, last =
|
1557
|
+
first, middle, last = SpecID::Pep.split_sequence(sequence)
|
1519
1558
|
arr = middle.scan(/[#{split_after}][^#{except_before}]/)
|
1520
1559
|
return arr.size
|
1521
1560
|
end
|
@@ -1524,7 +1563,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
|
|
1524
1563
|
def self.calc_num_tol_term(params, sequence)
|
1525
1564
|
num_tol = 0
|
1526
1565
|
split_after, except_before = params.enzyme_specificity
|
1527
|
-
first, middle, last =
|
1566
|
+
first, middle, last = SpecID::Pep.split_sequence(sequence)
|
1528
1567
|
last_of_middle = middle[-1,1]
|
1529
1568
|
first_of_middle = middle[0,1]
|
1530
1569
|
if ( split_after.include?(first) && !except_before.include?(first_of_middle) ) || first == '-'
|
@@ -1552,15 +1591,23 @@ class SpecID::Sequest::PepXML::SearchHit < Array
|
|
1552
1591
|
end
|
1553
1592
|
|
1554
1593
|
def to_pepxml
|
1594
|
+
mod_pepxml =
|
1595
|
+
if self[18]
|
1596
|
+
self[18].to_pepxml
|
1597
|
+
else
|
1598
|
+
''
|
1599
|
+
end
|
1600
|
+
|
1555
1601
|
element_xml("search_hit", [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :protein, :num_tot_proteins, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected]) do
|
1556
|
-
|
1602
|
+
mod_pepxml +
|
1603
|
+
search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
|
1557
1604
|
end
|
1558
1605
|
end
|
1559
1606
|
|
1560
1607
|
end
|
1561
1608
|
|
1562
1609
|
# Positions and masses of modifications
|
1563
|
-
class
|
1610
|
+
class Sequest::PepXML::SearchHit::ModificationInfo
|
1564
1611
|
include SpecIDXML
|
1565
1612
|
|
1566
1613
|
## Should be something like this:
|
@@ -1583,7 +1630,11 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
|
|
1583
1630
|
attr_accessor :mod_aminoacid_mass_array
|
1584
1631
|
|
1585
1632
|
def initialize(hash=nil)
|
1586
|
-
|
1633
|
+
@mod_nterm_mass = nil
|
1634
|
+
@mod_cterm_mass = nil
|
1635
|
+
if hash
|
1636
|
+
instance_var_set_from_hash(hash)
|
1637
|
+
end
|
1587
1638
|
end
|
1588
1639
|
|
1589
1640
|
# Will escape any xml special chars in modified_peptide
|
@@ -1621,3 +1672,4 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
|
|
1621
1672
|
|
1622
1673
|
end
|
1623
1674
|
|
1675
|
+
|