mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -7,6 +7,7 @@ require 'spec_id/bioworks'
7
7
  require 'instance_var_set_from_hash'
8
8
  require 'spec/msrun'
9
9
  require 'spec_id/srf'
10
+ require 'fileutils'
10
11
 
11
12
  class Numeric
12
13
  # returns a string with a + or - on the front
@@ -75,10 +76,10 @@ end
75
76
 
76
77
 
77
78
 
78
- module SpecID::Sequest; end
79
- class SpecID::Sequest::PepXML; end
79
+ module Sequest; end
80
+ class Sequest::PepXML; end
80
81
 
81
- class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
82
+ class Sequest::PepXML::MSMSPipelineAnalysis
82
83
  include SpecIDXML
83
84
  # Version 1.2.3
84
85
  attr_writer :date
@@ -106,7 +107,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
106
107
  def date
107
108
  if @date ; @date
108
109
  else
109
- case SpecID::Sequest::PepXML.pepxml_version
110
+ case Sequest::PepXML.pepxml_version
110
111
  when 18 ; tarr = Time.now.to_a ; tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
111
112
  when 0 ; Time.new.to_s
112
113
  end
@@ -132,7 +133,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
132
133
  end
133
134
 
134
135
  def to_pepxml
135
- case SpecID::Sequest::PepXML.pepxml_version
136
+ case Sequest::PepXML.pepxml_version
136
137
  when 0
137
138
  element_xml(:msms_pipeline_analysis, [:date, :summary_xml]) do
138
139
  @msms_run_summary.to_pepxml
@@ -142,13 +143,13 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
142
143
  @msms_run_summary.to_pepxml
143
144
  end
144
145
  else
145
- abort "Don't know how to deal with version: #{SpecID::Sequest::PepXML.pepxml_version}"
146
+ abort "Don't know how to deal with version: #{Sequest::PepXML.pepxml_version}"
146
147
  end
147
148
  end
148
149
 
149
150
  end
150
151
 
151
- class SpecID::Sequest::PepXML::MSMSRunSummary
152
+ class Sequest::PepXML::MSMSRunSummary
152
153
  include SpecIDXML
153
154
 
154
155
  # the version of TPP you are using (determines xml output)
@@ -184,7 +185,7 @@ class SpecID::Sequest::PepXML::MSMSRunSummary
184
185
  end
185
186
 
186
187
  def to_pepxml
187
- case SpecID::Sequest::PepXML.pepxml_version
188
+ case Sequest::PepXML.pepxml_version
188
189
  when 18
189
190
  element_xml_and_att_string(:msms_run_summary, "base_name=\"#{base_name}\" msManufacturer=\"#{ms_manufacturer}\" msModel=\"#{ms_model}\" msIonization=\"#{ms_ionization}\" msMassAnalyzer=\"#{ms_mass_analyzer}\" msDetector=\"#{ms_detector}\" raw_data_type=\"#{raw_data_type}\" raw_data=\"#{raw_data}\"") do
190
191
  sample_enzyme.to_pepxml +
@@ -210,7 +211,7 @@ end
210
211
 
211
212
 
212
213
 
213
- class SpecID::Sequest::PepXML
214
+ class Sequest::PepXML
214
215
  include SpecIDXML
215
216
 
216
217
  ## CREATE a default version for the entire class
@@ -292,21 +293,22 @@ class SpecID::Sequest::PepXML
292
293
  # objects. Ideally, we'd like these attributes to reside elsewhere, but for
293
294
  # memory concerns, this is best for now.
294
295
  def self._prot_num_and_first_prot_by_pep(pep_array)
295
- pep_array.hash_by(:sequence).each do |seq, pep_arr|
296
- prots = pep_arr.collect { |pep| pep.prot }
297
- prots.uniq!
298
- _size = prots.size
296
+ pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
297
+ prts = []
298
+ pep_arr.each { |pep| prts.push( *(pep.prots) ) }
299
+ prts.uniq!
300
+ _size = prts.size
299
301
  pep_arr.each do |pep|
300
302
  pep._num_prots = _size.to_s
301
- pep._first_prot = prots.first
303
+ pep._first_prot = prts.first
302
304
  end
303
305
  end
304
306
  end
305
307
 
306
308
 
307
- Default_Options = {
308
- :out_path => nil,
309
- :backup_db_path => '/project/marcotte/marcotte/ms/database',
309
+ Default_Options = {
310
+ :out_path => '.',
311
+ #:backup_db_path => '.',
310
312
  # a PepXML option
311
313
  :pepxml_version => DEF_VERSION,
312
314
  ## MSMSRunSummary options:
@@ -314,15 +316,18 @@ class SpecID::Sequest::PepXML
314
316
  # or create your own SampleEnzyme object
315
317
  :sample_enzyme => 'trypsin',
316
318
  :ms_manufacturer => 'ThermoFinnigan',
317
- :ms_model => 'LCQ Deca XP',
319
+ :ms_model => 'LCQ Deca XP Plus',
318
320
  :ms_ionization => 'ESI',
319
321
  :ms_mass_analyzer => 'Ion Trap',
320
322
  :ms_detector => 'UNKNOWN',
323
+ :ms_data => '.', # path to ms data files (raw or mzxml)
321
324
  :raw_data_type => "raw",
322
325
  :raw_data => ".mzXML", ## even if you don't have it?
323
326
  ## SearchSummary options:
324
327
  :out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
325
- :out_data => ".tgz" ## may be srf??
328
+ :out_data => ".tgz", ## may be srf??
329
+ :copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
330
+ :print => false, # print the objects to file
326
331
  }
327
332
 
328
333
  # will dynamically set :ms_model and :ms_mass_analyzer from srf info
@@ -330,23 +335,23 @@ class SpecID::Sequest::PepXML
330
335
  # and LCQ Deca XP
331
336
  # See SRF::Sequest::PepXML::Default_Options hash for defaults
332
337
  # unless given, the out_path will be given as the path of the srf_file
333
- def self.new_from_srf(srf_file, opts={})
338
+ # srf may be an object or a filename
339
+ def self.new_from_srf(srf, opts={})
334
340
  opts = Default_Options.merge(opts)
335
341
 
336
- ## set the outpath
337
- out_path = opts.delete(:out_path)
338
- unless out_path
339
- out_path = File.dirname(srf_file)
342
+ ## read the srf file
343
+ if srf.is_a? String
344
+ srf = SRF.new(srf)
340
345
  end
341
346
 
342
- ## read the srf file
343
- srf = SRF.new(srf_file)
347
+ ## set the outpath
348
+ out_path = opts.delete(:out_path)
344
349
 
345
350
  params = srf.params
346
351
 
347
352
  ## check to see if we need backup_db
348
353
  backup_db_path = opts.delete(:backup_db_path)
349
- unless File.exist? params.database
354
+ if !File.exist?(params.database) && backup_db_path
350
355
  params.database_path = backup_db_path
351
356
  end
352
357
 
@@ -374,24 +379,47 @@ class SpecID::Sequest::PepXML
374
379
 
375
380
  ## Create the search summary:
376
381
  search_summary_options = {
377
- :search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params),
382
+ :search_database => Sequest::PepXML::SearchDatabase.new(params),
378
383
  :base_name => full_base_name_no_ext,
379
384
  :out_data_type => out_data_type,
380
385
  :out_data => out_data
381
386
  }
382
- opts[:search_summary] = SpecID::Sequest::PepXML::SearchSummary.new( params, search_summary_options)
387
+ modifications_string = srf.header.modifications
388
+ search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
383
389
 
384
390
  ## Create the SampleEnzyme object if necessary
385
391
  unless opts[:sample_enzyme].is_a? SampleEnzyme
386
392
  opts[:sample_enzyme] = SampleEnzyme.new(opts[:sample_enzyme])
387
393
  end
388
394
 
389
- ## Create the pepxml obj
390
- pepxml_obj = SpecID::Sequest::PepXML.new(ppxml_version, params)
395
+ ## Create the pepxml obj and top level objects
396
+ pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
397
+ pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
398
+ pepxml_obj.msms_pipeline_analysis = pipeline
399
+ pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
400
+ pipeline.msms_run_summary.search_summary = search_summary
401
+ modifications_obj = search_summary.modifications
402
+
391
403
  ## name some common variables we'll need
392
404
  h_plus = pepxml_obj.h_plus
393
405
  avg_parent = pepxml_obj.avg_parent
394
406
 
407
+
408
+ ## COPY MZXML FILES IF NECESSARY
409
+ if opts[:copy_mzxml]
410
+ mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
411
+ to_copy = Spec::MzXML.file_to_mzxml(mzxml_pathname_noext)
412
+ if to_copy
413
+ FileUtils.cp to_copy, out_path
414
+ else
415
+ puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
416
+ puts "Perhaps you need to specifiy the location of the raw data"
417
+ puts "or need an mzXML converter (readw.exe or t2x)"
418
+ exit
419
+ end
420
+ end
421
+
422
+
395
423
  #######################################################################
396
424
  # CREATE the spectrum_queries_ar
397
425
  #######################################################################
@@ -420,6 +448,8 @@ class SpecID::Sequest::PepXML
420
448
  deltacnstar = '1'
421
449
  end
422
450
 
451
+
452
+
423
453
  ## mass calculations:
424
454
  precursor_neutral_mass = dta_file.mh - h_plus
425
455
  calc_neutral_pep_mass = top_hit[0] - h_plus
@@ -428,6 +458,9 @@ class SpecID::Sequest::PepXML
428
458
  else ; massdiff = massdiff.to_s end
429
459
 
430
460
  (start_scan, end_scan, charge) = srf_index[i]
461
+
462
+
463
+
431
464
  sq_hash = {
432
465
  :spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
433
466
  :start_scan => start_scan,
@@ -438,9 +471,13 @@ class SpecID::Sequest::PepXML
438
471
  :index => files_with_hits_index,
439
472
  }
440
473
 
474
+ spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
475
+
476
+ sequence = top_hit[8]
477
+
441
478
  # NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
442
479
  ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
443
- (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_hit[8])
480
+ (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
444
481
  # ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
445
482
 
446
483
  sh_hash = {
@@ -448,14 +485,14 @@ class SpecID::Sequest::PepXML
448
485
  :peptide => pepseq,
449
486
  :peptide_prev_aa => prevaa,
450
487
  :peptide_next_aa => nextaa,
451
- :protein => top_hit[9].split(" ").first,
452
- :num_tot_proteins => top_hit[10],
488
+ :protein => top_hit[9].first.reference.split(" ").first,
489
+ :num_tot_proteins => top_hit[9].size,
453
490
  :num_matched_ions => top_hit[6],
454
491
  :tot_num_ions => top_hit[7],
455
492
  :calc_neutral_pep_mass => calc_neutral_pep_mass,
456
493
  :massdiff => massdiff,
457
- :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_hit[8]),
458
- :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_hit[8]),
494
+ :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, sequence),
495
+ :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, sequence),
459
496
  :is_rejected => '0',
460
497
  # These are search score attributes:
461
498
  :xcorr => top_hit[3],
@@ -463,51 +500,88 @@ class SpecID::Sequest::PepXML
463
500
  :deltacnstar => deltacnstar,
464
501
  :spscore => top_hit[2],
465
502
  :sprank => top_hit[5],
503
+ :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
466
504
  }
505
+ search_hit = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
467
506
 
468
- spectrum_queries_arr[files_with_hits_index] = SpecID::Sequest::PepXML::SpectrumQuery.new(sq_hash) do
469
- search_result = SpecID::Sequest::PepXML::SearchResult.new do
470
- [ SpecID::Sequest::PepXML::SearchHit.new(sh_hash) ] # there can be multiple hits
471
- end # SearchResult
472
- [search_result] # can be multiple
473
- end
507
+ search_result = Sequest::PepXML::SearchResult.new
508
+ search_result.search_hits = [search_hit]
509
+ spectrum_query.search_results = [search_result]
510
+ spectrum_queries_arr[files_with_hits_index] = spectrum_query
474
511
  end
475
512
  spectrum_queries_arr.compact!
476
513
 
477
- #######################################################################
478
- # ADD the pipeline analysis
479
- #######################################################################
480
-
481
- pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'}) do
482
- SpecID::Sequest::PepXML::MSMSRunSummary.new(opts) { spectrum_queries_arr }
483
- end
484
- pepxml_obj.msms_pipeline_analysis = pipeline
514
+ pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
485
515
  pepxml_obj.base_name = pipeline.msms_run_summary.base_name
516
+ pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
517
+
486
518
  pepxml_obj
487
519
  end
488
520
 
521
+ # takes an .srg or bioworks.xml file
522
+ # if possible, ensures that an mzXML file is present for each pepxml file
523
+ # :print => true, will print files
524
+ def self.set_from_bioworks(bioworks_file, opts={})
525
+ opts = Default_Options.merge(opts)
526
+ ## Create the out_path directory if necessary
527
+
528
+ unless File.exist? opts[:out_path]
529
+ FileUtils.mkpath(opts[:out_path])
530
+ end
531
+ unless File.directory? opts[:out_path]
532
+ abort "#{opts[:out_path]} must be a directory!"
533
+ end
534
+
535
+ spec_id = SpecID.new(bioworks_file)
536
+ pepxml_objs =
537
+ if spec_id.is_a? Bioworks
538
+ abort("must have opts[:params] set!") unless opts[:params]
539
+ set_from_bioworks_xml(bioworks_file, opts[:params], opts)
540
+ elsif spec_id.is_a? SRFGroup
541
+ spec_id.srfs.map do |srf|
542
+ new_from_srf(srf, opts)
543
+ end
544
+ else
545
+ abort "invalid object"
546
+ end
547
+
548
+ if opts[:print]
549
+ pepxml_objs.each do |obj|
550
+ obj.to_pepxml(obj.base_name + ".xml")
551
+ end
552
+ end
553
+ pepxml_objs
554
+ end
555
+
556
+
489
557
  # Takes bioworks 3.2/3.3 xml output (with no filters)
490
558
  # Returns a list of PepXML objects
491
- # msdata = path to mzXML files (or .timeIndex files) (or @TODO: path to sqt file(s))
492
559
  # params = sequest.params file
493
560
  # bioworks = bioworks.xml exported multi-consensus view file
494
561
  # pepxml_version = 0 for tpp 1.2.3
495
562
  # pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
496
- def self.set_from_bioworks(params, bioworks, msdata, out_path, pepxml_version=18, sample_enzyme='trypsin', ms_manufacturer='ThermoFinnigan', ms_model='LCQ Deca XP Plus', ms_ionization='ESI', ms_mass_analyzer='Ion Trap', ms_detector='UNKNOWN', raw_data_type="raw", raw_data=".mzXML", out_data_type="out", out_data=".tgz")
563
+ def self.set_from_bioworks_xml(bioworks, params, opts={})
564
+ opts = Default_Options.merge(opts)
565
+ pepxml_version, sample_enzyme, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :sample_enzyme, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
566
+
567
+ unless out_path
568
+ out_path = '.'
569
+ end
570
+
497
571
  supported_versions = [0,18]
498
572
 
499
- unless supported_versions.include?(pepxml_version)
573
+ unless supported_versions.include?(opts[:pepxml_version])
500
574
  abort "pepxml_version: #{pepxml_version} not currently supported. Current support is for versions #{supported_versions.join(', ')}"
501
575
  end
502
576
 
503
577
  ## Turn params and bioworks_obj into objects if necessary:
504
578
  # Params:
505
- if params.class == SpecID::Sequest::Params # OK!
506
- elsif params.class == String ; params = SpecID::Sequest::Params.new(params)
579
+ if params.class == Sequest::Params # OK!
580
+ elsif params.class == String ; params = Sequest::Params.new(params)
507
581
  else ; abort "Don't recognize #{params} as object or string!"
508
582
  end
509
583
  # Bioworks:
510
- if bioworks.class == SpecID::Bioworks # OK!
584
+ if bioworks.class == Bioworks # OK!
511
585
  elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
512
586
  else ; abort "Don't recognize #{bioworks} as object or string!"
513
587
  end
@@ -516,39 +590,98 @@ class SpecID::Sequest::PepXML
516
590
 
517
591
  ## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
518
592
  ## bioworks
519
- #bioworks.peps.each { |pep| if pep.class != SpecID::Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
593
+ #bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
520
594
 
595
+ ## check to see if we need backup_db
596
+
597
+ backup_db_path = opts.delete(:backup_db_path)
598
+ if !File.exist?(params.database) && backup_db_path
599
+ params.database_path = backup_db_path
600
+ end
521
601
 
522
602
  ## Start
523
603
  split_bio_objs = []
524
604
 
525
- ## Create a hash by pep object containing num_tot_proteins
526
- ## This is only valid if all hits are present (no previous thresholding)
527
- self._prot_num_and_first_prot_by_pep(bioworks.peps)
528
605
  ## (num_prots_by_pep, prot_by_pep) =
529
606
  #num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"
530
607
 
608
+ modifications_string = bioworks.modifications
609
+ search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
610
+ modifications_obj = search_summary.modifications
611
+
531
612
  ## Create a hash of spectrum_query arrays by filename (this very big block):
532
613
  spectrum_queries_by_base_name = {}
533
- pepxml_objs_by_base_name = {}
534
614
  # Hash by the filenames to split into filenames:
535
- bioworks.peps.hash_by(:base_name).each do |base_name, pep_arr|
615
+ bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|
536
616
 
617
+ pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
618
+ full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
619
+
620
+ case pepxml_version
621
+ when 18
622
+ pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
623
+ msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
624
+ :base_name => full_base_name_no_ext,
625
+ :ms_manufacturer => ms_manufacturer,
626
+ :ms_model => ms_model,
627
+ :ms_ionization => ms_ionization,
628
+ :ms_mass_analyzer => ms_mass_analyzer,
629
+ :ms_detector => ms_detector,
630
+ :raw_data_type => raw_data_type,
631
+ :raw_data => raw_data,
632
+ :sample_enzyme => SampleEnzyme.new(sample_enzyme),
633
+ :search_summary => search_summary,
634
+ })
635
+ pipeline.msms_run_summary = msms_run_summary
636
+ pepxml_obj.msms_pipeline_analysis = pipeline
637
+ pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name = full_base_name_no_ext
638
+ pepxml_obj.base_name = full_base_name_no_ext
639
+ pepxml_obj
640
+ when 0
641
+ ## @TODO: NEED TO REVAMP THIS:
642
+ # Sequest::PepXML.new(pepxml_version).set_from_hash({
643
+ # :params => params,
644
+ # :search_results => spectrum_queries_arr,
645
+ # :base_name => self.make_base_name( File.expand_path(out_path), base_name),
646
+ # :search_engine => params.search_engine,
647
+ # :database => params.database,
648
+ # :raw_data_type => "mzXML",
649
+ # :raw_data => ".mzXML",
650
+ # :out_data_type => "out",
651
+ # :out_data => ".tgz",
652
+ # :sample_enzyme => params.enzyme,
653
+ # })
654
+ end
655
+
656
+
657
+
658
+
659
+
660
+ # Create a hash by pep object containing num_tot_proteins
661
+ # This is only valid if all hits are present (no previous thresholding)
662
+ # Since out2summary only acts on one folder at a time,
663
+ # we should only do it for one folder at a time! (that's why we do this
664
+ # here instead of globally)
665
+ self._prot_num_and_first_prot_by_pep(pep_arr)
537
666
  prec_mz_arr = nil
538
667
  case x = bioworks.version
539
668
  when /3.2/
540
669
  calc_prec_by = :prec_mz_arr
541
670
  # get the precursor_mz array for this filename
542
- inner__full_base_name_no_ext = File.join(msdata, base_name)
543
- prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(inner__full_base_name_no_ext)
671
+ prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(File.join(ms_data, base_name))
544
672
  when /3.3/
545
673
  calc_prec_by = :deltamass
546
674
  else
547
675
  abort "invalid BioworksBrowser version: #{x}"
548
676
  end
549
677
 
550
- pepxml_obj = SpecID::Sequest::PepXML.new(pepxml_version, params)
551
- pepxml_objs_by_base_name[base_name] = pepxml_obj
678
+ if opts[:copy_mzxml]
679
+ to_copy = Spec::MzXML.file_to_mzxml(File.join(ms_data, base_name))
680
+ if to_copy
681
+ FileUtils.cp to_copy, out_path
682
+ end
683
+ end
684
+
552
685
 
553
686
  spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
554
687
 
@@ -561,9 +694,9 @@ class SpecID::Sequest::PepXML
561
694
 
562
695
  case calc_prec_by
563
696
  when :prec_mz_arr
564
- precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
697
+ precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
565
698
  when :deltamass
566
- precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
699
+ precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
567
700
  end
568
701
 
569
702
  calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
@@ -581,98 +714,58 @@ class SpecID::Sequest::PepXML
581
714
  end
582
715
  # Create the nested structure of queries{results{hits}}
583
716
  # (Ruby's blocks work beautifully for things like this)
584
- spec_query = SpecID::Sequest::PepXML::SpectrumQuery.new({
717
+ spec_query = Sequest::PepXML::SpectrumQuery.new({
585
718
  :spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
586
719
  :start_scan => top_pep.first_scan,
587
720
  :end_scan => top_pep.last_scan,
588
721
  :precursor_neutral_mass => precursor_neutral_mass.to_s,
589
722
  :assumed_charge => top_pep.charge,
590
723
  :pepxml_version => pepxml_version,
591
- }) do
592
- search_result = SpecID::Sequest::PepXML::SearchResult.new do
593
-
594
- ## Calculate some interdependent values;
595
- # NOTE: the bioworks mass is really M+H if two or more scans went
596
- # into the search_hit; calc_neutral_pep_mass is simply the avg of
597
- # precursor masses adjusted to be neutral
598
- (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_pep.sequence)
599
- (num_matched_ions, tot_num_ions) = SpecID::Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
600
- search_hit = SpecID::Sequest::PepXML::SearchHit.new({
601
- :hit_rank => "1",
602
- :peptide => pepseq,
603
- :peptide_prev_aa => prevaa,
604
- :peptide_next_aa => nextaa,
605
- :protein => top_pep._first_prot.reference.split(" ").first,
606
- :num_tot_proteins => top_pep._num_prots,
607
- :num_matched_ions => num_matched_ions,
608
- :tot_num_ions => tot_num_ions,
609
- :calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
610
- :massdiff => massdiff,
611
- :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
612
- :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
613
- :is_rejected => "0",
614
- # These are search score attributes:
615
- :xcorr => top_pep.xcorr,
616
- :deltacn => top_pep.deltacn,
617
- :deltacnstar => deltacnstar,
618
- :spscore => top_pep.sp,
619
- :sprank => top_pep.rsp,
620
- })
621
- [search_hit] # there can be multiple search hits
622
- end # SearchResult
623
- [search_result] # can be multiple search_results
624
- end # SpectrumQuery
625
- end # Collects the spectrum queries
724
+ })
725
+
726
+
727
+ search_result = Sequest::PepXML::SearchResult.new
728
+
729
+ ## Calculate some interdependent values;
730
+ # NOTE: the bioworks mass is reallyf M+H if two or more scans went
731
+ # into the search_hit; calc_neutral_pep_mass is simply the avg of
732
+ # precursor masses adjusted to be neutral
733
+ (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
734
+ (num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
735
+ search_hit = Sequest::PepXML::SearchHit.new({
736
+ :hit_rank => "1",
737
+ :peptide => pepseq,
738
+ :peptide_prev_aa => prevaa,
739
+ :peptide_next_aa => nextaa,
740
+ :protein => top_pep._first_prot.reference.split(" ").first,
741
+ :num_tot_proteins => top_pep._num_prots,
742
+ :num_matched_ions => num_matched_ions,
743
+ :tot_num_ions => tot_num_ions,
744
+ :calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
745
+ :massdiff => massdiff,
746
+ :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
747
+ :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
748
+ :is_rejected => "0",
749
+ # These are search score attributes:
750
+ :xcorr => top_pep.xcorr,
751
+ :deltacn => top_pep.deltacn,
752
+ :deltacnstar => deltacnstar,
753
+ :spscore => top_pep.sp,
754
+ :sprank => top_pep.rsp,
755
+ :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
756
+ })
757
+ search_result.search_hits = [search_hit] # there can be multiple search hits
758
+ spec_query.search_results = [search_result] # can be multiple search_results
759
+ spec_query
760
+ end
626
761
 
627
762
  # create an index by spectrum as results end up typically in out2summary
628
763
  # (I really dislike this order, however)
629
764
  spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
630
765
  spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
631
-
632
- spectrum_queries_by_base_name[base_name] = spectrum_queries_ar
633
- end
634
-
635
- modifications_string = bioworks.modifications
636
-
637
- spectrum_queries_by_base_name.collect do |base_name, spectrum_queries_ar|
638
- case pepxml_version
639
- when 18
640
- pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'}) do
641
- full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
642
- SpecID::Sequest::PepXML::MSMSRunSummary.new({
643
- :base_name => full_base_name_no_ext,
644
- :ms_manufacturer => ms_manufacturer,
645
- :ms_model => ms_model,
646
- :ms_ionization => ms_ionization,
647
- :ms_mass_analyzer => ms_mass_analyzer,
648
- :ms_detector => ms_detector,
649
- :raw_data_type => raw_data_type,
650
- :raw_data => raw_data,
651
- :sample_enzyme => SampleEnzyme.new(sample_enzyme),
652
- :search_summary => SpecID::Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params), :base_name => full_base_name_no_ext, :out_data_type => out_data_type, :out_data => out_data}),
653
- }) { spectrum_queries_ar }
654
- end
655
- pepxml_obj = pepxml_objs_by_base_name[base_name]
656
- pepxml_obj.msms_pipeline_analysis = pipeline
657
- pepxml_obj.base_name = pipeline.msms_run_summary.base_name
658
- pepxml_obj
659
- when 0
660
- ## @TODO: NEED TO REVAMP THIS:
661
- # SpecID::Sequest::PepXML.new(pepxml_version).set_from_hash({
662
- # :params => params,
663
- # :search_results => spectrum_queries_arr,
664
- # :base_name => self.make_base_name( File.expand_path(out_path), base_name),
665
- # :search_engine => params.search_engine,
666
- # :database => params.database,
667
- # :raw_data_type => "mzXML",
668
- # :raw_data => ".mzXML",
669
- # :out_data_type => "out",
670
- # :out_data => ".tgz",
671
- # :sample_enzyme => params.enzyme,
672
- # })
673
- end
674
- end # collects the pepxml objects
675
-
766
+ pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
767
+ pepxml_obj
768
+ end ## collects pepxml_objs
676
769
  end
677
770
 
678
771
  def summary_xml
@@ -724,7 +817,7 @@ end # PepXML
724
817
  ##
725
818
  # In the future, this guy should accept any version of bioworks params file
726
819
  # and spit out any param queried.
727
- class SpecID::Sequest::Params
820
+ class Sequest::Params
728
821
  include SpecIDXML
729
822
 
730
823
  # current attributes supported are:
@@ -941,7 +1034,7 @@ class SpecID::Sequest::Params
941
1034
 
942
1035
  end
943
1036
 
944
- class SpecID::Sequest::PepXML::SearchResult
1037
+ class Sequest::PepXML::SearchResult
945
1038
  include SpecIDXML
946
1039
  # an array of search_hits
947
1040
  attr_accessor :search_hits
@@ -959,7 +1052,7 @@ class SpecID::Sequest::PepXML::SearchResult
959
1052
  end
960
1053
  end
961
1054
 
962
- class SpecID::Sequest::PepXML::SearchSummary
1055
+ class Sequest::PepXML::SearchSummary
963
1056
  include SpecIDXML
964
1057
  attr_accessor :params
965
1058
  attr_accessor :base_name
@@ -974,7 +1067,7 @@ class SpecID::Sequest::PepXML::SearchSummary
974
1067
  def initialize(params, modifications_string='', args=nil)
975
1068
  @search_id = nil
976
1069
  @params = params
977
- @modifications = SpecID::Sequest::PepXML::Modifications.new(params, modifications_string)
1070
+ @modifications = Sequest::PepXML::Modifications.new(params, modifications_string)
978
1071
  if args ; set_from_hash(args) end
979
1072
  end
980
1073
 
@@ -999,7 +1092,7 @@ class SpecID::Sequest::PepXML::SearchSummary
999
1092
 
1000
1093
  end
1001
1094
 
1002
- class SpecID::Sequest::PepXML::Modifications
1095
+ class Sequest::PepXML::Modifications
1003
1096
  include SpecIDXML
1004
1097
 
1005
1098
  # sequest params object
@@ -1032,20 +1125,27 @@ class SpecID::Sequest::PepXML::Modifications
1032
1125
 
1033
1126
  # set the masses_by_diff_mod and mod_symbols_hash from
1034
1127
  def set_hashes(modification_symbols_string)
1128
+
1035
1129
  @mod_symbols_hash = {}
1036
1130
  @masses_by_diff_mod = {}
1037
- if modification_symbols_string == nil || modification_symbols_string == ''
1131
+ if (modification_symbols_string == nil || modification_symbols_string == '')
1038
1132
  return nil
1039
1133
  end
1040
1134
  table = @params.mass_table
1041
1135
  modification_symbols_string.split(/\)\s+\(/).each do |mod|
1042
- if mod =~ /\(?(\w{1,2})(.) (.[\d\.]+)\)?/
1043
- aa_as_sym = $1.to_sym,
1044
- @mod_symbols_hash[[aa_as_sym, $3.to_f]] = $2.dup
1136
+ if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
1045
1137
  if $1 == 'ct' || $1 == 'nt'
1046
- @masses_by_diff_mod[$2] = $3.to_f
1138
+ mass_diff = $3.to_f
1139
+ @masses_by_diff_mod[$2] = mass_diff
1140
+ @mod_symbols_hash[[$1, mass_diff]] = $2.dup
1047
1141
  else
1048
- @masses_by_diff_mod[$1+$2] = $3.to_f + table[aa_as_sym]
1142
+ symbol_string = $2.dup
1143
+ mass_diff = $3.to_f
1144
+ $1.split('').each do |aa|
1145
+ aa_as_sym = aa.to_sym
1146
+ @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
1147
+ @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
1148
+ end
1049
1149
  end
1050
1150
  end
1051
1151
  end
@@ -1058,8 +1158,8 @@ class SpecID::Sequest::PepXML::Modifications
1058
1158
  if @masses_by_diff_mod.size == 0
1059
1159
  return nil
1060
1160
  end
1061
- hash[:modified_peptide] = peptide.dup
1062
1161
  hash = {}
1162
+ hash[:modified_peptide] = peptide.dup
1063
1163
  hsh = @masses_by_diff_mod
1064
1164
  table = @params.mass_table
1065
1165
  h = table[:h] # this? or h_plus ??
@@ -1068,12 +1168,13 @@ class SpecID::Sequest::PepXML::Modifications
1068
1168
  if hsh.key? peptide[0,1]
1069
1169
  # AA + H + differential_mod
1070
1170
  hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
1071
- peptide.slice!( 1..-1 )
1171
+ peptide = peptide[1...(peptide.size)]
1072
1172
  end
1073
- if hsh.key? peptide[-1,1]
1173
+ if hsh.key? peptide[(peptide.size-1),1]
1074
1174
  # AA + OH + differential_mod
1075
- hash[:mod_cterm_mass] = table[peptide[-2,1].to_sym] + oh + hsh[peptide[-1,1]]
1175
+ hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
1076
1176
  peptide.slice!( 0..-2 )
1177
+ peptide = peptide[0...(peptide.size-1)]
1077
1178
  end
1078
1179
  mod_array = []
1079
1180
  (0...peptide.size).each do |i|
@@ -1084,8 +1185,8 @@ class SpecID::Sequest::PepXML::Modifications
1084
1185
  if mod_array.size > 0
1085
1186
  hash[:mod_aminoacid_mass_array] = mod_array
1086
1187
  end
1087
- if hash.size > 0
1088
- SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
1188
+ if hash.size > 1 # if there is more than just the modified peptide there
1189
+ Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
1089
1190
  else
1090
1191
  nil
1091
1192
  end
@@ -1127,7 +1228,7 @@ class SpecID::Sequest::PepXML::Modifications
1127
1228
  :variable => 'N',
1128
1229
  :binary => 'Y',
1129
1230
  }
1130
- SpecID::Sequest::PepXML::AAModification.new(hash)
1231
+ Sequest::PepXML::AAModification.new(hash)
1131
1232
  end
1132
1233
 
1133
1234
  ## Create the static_terminal_mods objects
@@ -1149,7 +1250,7 @@ class SpecID::Sequest::PepXML::Modifications
1149
1250
  :description => mod[0],
1150
1251
  }
1151
1252
  hash[:protein_terminus] = protein_terminus if protein_terminus
1152
- SpecID::Sequest::PepXML::TerminalModification.new(hash)
1253
+ Sequest::PepXML::TerminalModification.new(hash)
1153
1254
  end
1154
1255
  #################################
1155
1256
  # Variable Mods:
@@ -1159,20 +1260,25 @@ class SpecID::Sequest::PepXML::Modifications
1159
1260
  variable_mods = []
1160
1261
  (0...arr.size).step(2) do |i|
1161
1262
  if arr[i].to_f != 0.0
1162
- variable_mods << [arr[i+1].to_sym, arr[i].to_f]
1263
+ variable_mods << [arr[i+1], arr[i].to_f]
1163
1264
  end
1164
1265
  end
1165
- variable_mods.map! do |mod|
1166
- hash = {
1167
- :aminoacid => mod[0].to_s,
1168
- :massdiff => mod[1].to_plus_minus_string,
1169
- :mass => aa_hash[mod[0]] + mod[1],
1170
- :variable => 'Y',
1171
- :binary => 'N',
1172
- :symbol => @mod_symbols_hash[mod],
1173
- }
1174
- SpecID::Sequest::PepXML::AAModification.new(hash)
1266
+ mod_objects = []
1267
+ variable_mods.each do |mod|
1268
+ mod[0].split('').each do |aa|
1269
+ hash = {
1270
+
1271
+ :aminoacid => aa,
1272
+ :massdiff => mod[1].to_plus_minus_string,
1273
+ :mass => aa_hash[aa.to_sym] + mod[1],
1274
+ :variable => 'Y',
1275
+ :binary => 'N',
1276
+ :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
1277
+ }
1278
+ mod_objects << Sequest::PepXML::AAModification.new(hash)
1279
+ end
1175
1280
  end
1281
+ variable_mods = mod_objects
1176
1282
  #################################
1177
1283
  # TERMINAL Variable Mods:
1178
1284
  #################################
@@ -1194,7 +1300,7 @@ class SpecID::Sequest::PepXML::Modifications
1194
1300
  :variable => 'Y',
1195
1301
  :symbol => symb,
1196
1302
  }
1197
- SpecID::Sequest::PepXML::TerminalModification.new(hash)
1303
+ Sequest::PepXML::TerminalModification.new(hash)
1198
1304
  end
1199
1305
 
1200
1306
  #########################
@@ -1221,7 +1327,7 @@ end
1221
1327
 
1222
1328
  # Modified aminoacid, static or variable
1223
1329
  # unless otherwise stated, all attributes can be anything
1224
- class SpecID::Sequest::PepXML::AAModification
1330
+ class Sequest::PepXML::AAModification
1225
1331
  include SpecIDXML
1226
1332
 
1227
1333
  # The amino acid (one letter code)
@@ -1256,7 +1362,7 @@ class SpecID::Sequest::PepXML::AAModification
1256
1362
  end
1257
1363
 
1258
1364
  # Modified aminoacid, static or variable
1259
- class SpecID::Sequest::PepXML::TerminalModification
1365
+ class Sequest::PepXML::TerminalModification
1260
1366
  include SpecIDXML
1261
1367
 
1262
1368
  # n for N-terminus, c for C-terminus
@@ -1285,7 +1391,7 @@ class SpecID::Sequest::PepXML::TerminalModification
1285
1391
  end
1286
1392
 
1287
1393
 
1288
- class SpecID::Sequest::PepXML::SearchDatabase
1394
+ class Sequest::PepXML::SearchDatabase
1289
1395
  include SpecIDXML
1290
1396
  attr_accessor :local_path
1291
1397
  attr_writer :seq_type
@@ -1316,7 +1422,7 @@ class SpecID::Sequest::PepXML::SearchDatabase
1316
1422
 
1317
1423
  end
1318
1424
 
1319
- class SpecID::Sequest::PepXML::SpectrumQuery
1425
+ class Sequest::PepXML::SpectrumQuery
1320
1426
  include SpecIDXML
1321
1427
 
1322
1428
  # basename_noext.first_scan.last_scan.charge
@@ -1344,7 +1450,7 @@ class SpecID::Sequest::PepXML::SpectrumQuery
1344
1450
  # FOR PEPXML:
1345
1451
  ############################################################
1346
1452
  def to_pepxml
1347
- case SpecID::Sequest::PepXML.pepxml_version
1453
+ case Sequest::PepXML.pepxml_version
1348
1454
  when 18
1349
1455
  element_xml("spectrum_query", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
1350
1456
  @search_results.collect { |sr| sr.to_pepxml }.join
@@ -1412,102 +1518,35 @@ class SpecID::Sequest::PepXML::SpectrumQuery
1412
1518
 
1413
1519
  end
1414
1520
 
1415
- # This object inherits from Array. As such, it is very memory efficient
1416
- # (compared to a normal object). However, certain operations when used on
1417
- # these objects will produce undesirable results: An array of these objects
1418
- # will be flattened (becoming a long list of attributes) when 'flatten' is
1419
- # called on them, which is not the behavior we want! other odd behavior is
1420
- # possible. Possible fixes are to use a delegate class or redefine the way
1421
- # this responds to flatten (so that it won't flatten).
1422
- class SpecID::Sequest::PepXML::SearchHit < Array
1521
+
1522
+
1523
+ Sequest::PepXML::SearchHit = ArrayClass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info) )
1524
+
1525
+ # hit_rank=0 peptide=1 peptide_prev_aa=2 peptide_next_aa=3 protein=4 num_tot_proteins=5 num_matched_ions=6 tot_num_ions=7 calc_neutral_pep_mass=8 massdiff=9 num_tol_term=10 num_missed_cleavages=11 is_rejected=12 deltacnstar=13 xcorr=14 deltacn=15 spscore=16 sprank=17 modification_info=18
1526
+
1527
+ class Sequest::PepXML::SearchHit
1423
1528
  include SpecIDXML
1424
1529
 
1425
1530
  Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
1426
1531
 
1427
- # num_tot_proteins = "Number of unique proteins in search database containing peptide"
1428
- #attr_accessor 0:hit_rank, 1:peptide, 2:peptide_prev_aa, 3:peptide_next_aa, 4:protein, 5:num_tot_proteins, 6:num_matched_ions, 7:tot_num_ions, 8:calc_neutral_pep_mass, 9:massdiff, 10:num_tol_term, 11:num_missed_cleavages, 12:is_rejected
1429
- #attr_accessor 13:deltacnstar
1430
- #attr_accessor 14:xcorr, 15:deltacn, 16:spscore, 17:sprank
1431
- ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
1432
-
1433
- ind_keys = {:hit_rank => 0, :peptide => 1, :peptide_prev_aa => 2, :peptide_next_aa => 3, :protein => 4, :num_tot_proteins => 5, :num_matched_ions => 6, :tot_num_ions => 7, :calc_neutral_pep_mass => 8, :massdiff => 9, :num_tol_term => 10, :num_missed_cleavages => 11, :is_rejected => 12, :deltacnstar => 13, :xcorr => 14, :deltacn => 15, :spscore => 16, :sprank => 17}
1434
- @@methods = ind_keys.keys
1435
- def hit_rank ; self[0] end ; def hit_rank=(oth) ; self[0] = oth end
1436
- def peptide ; self[1] end ; def peptide=(oth) ; self[1] = oth end
1437
- def peptide_prev_aa ; self[2] end ; def peptide_prev_aa=(oth) ; self[2] = oth end
1438
- def peptide_next_aa ; self[3] end ; def peptide_next_aa=(oth) ; self[3] = oth end
1439
- def protein ; self[4] end ; def protein=(oth) ; self[4] = oth end
1440
- def num_tot_proteins ; self[5] end ; def num_tot_proteins=(oth) ; self[5] = oth end
1441
- def num_matched_ions ; self[6] end ; def num_matched_ions=(oth) ; self[6] = oth end
1442
- def tot_num_ions ; self[7] end ; def tot_num_ions=(oth) ; self[7] = oth end
1443
- def calc_neutral_pep_mass ; self[8] end ; def calc_neutral_pep_mass=(oth) ; self[8] = oth end
1444
- def massdiff ; self[9] end ; def massdiff=(oth) ; self[9] = oth end
1445
- def num_tol_term ; self[10] end ; def num_tol_term=(oth) ; self[10] = oth end
1446
- def num_missed_cleavages ; self[11] end ; def num_missed_cleavages=(oth) ; self[11] = oth end
1447
- def is_rejected ; self[12] end ; def is_rejected=(oth) ; self[12] = oth end
1448
- def deltacnstar ; self[13] end ; def deltacnstar=(oth) ; self[13] = oth end
1449
- def xcorr ; self[14] end ; def xcorr=(oth) ; self[14] = oth end
1450
- def deltacn ; self[15] end ; def deltacn=(oth) ; self[15] = oth end
1451
- def spscore ; self[16] end ; def spscore=(oth) ; self[16] = oth end
1452
- def sprank ; self[17] end ; def sprank=(oth) ; self[17] = oth end
1453
-
1454
- @@arr_size = ind_keys.size
1455
- ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
1456
- ind_keys.merge!(ind_keys_w_eq)
1457
- ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
1458
1532
 
1459
1533
  # These are all search_score elements:
1460
1534
 
1461
1535
  # 1 if there is no second ranked hit, 0 otherwise
1462
1536
 
1537
+ tmp_verb = $VERBOSE
1538
+ $VERBOSE = nil
1463
1539
  def initialize(hash=nil)
1464
1540
  super(@@arr_size)
1465
- self[0,18] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank]]
1466
- self
1467
- #if hash ; set_from_hash(hash) end
1468
- end
1469
-
1470
- # remove_non_amino_acids && split_sequence
1471
- def self.prepare_sequence(val)
1472
- nv = remove_non_amino_acids(val)
1473
- split_sequence(nv)
1474
- end
1475
-
1476
- # Returns prev, peptide, next from sequence. Parse errors return
1477
- # nil,nil,nil
1478
- # R.PEPTIDE.A # -> R, PEPTIDE, A
1479
- # R.PEPTIDE.- # -> R, PEPTIDE, -
1480
- # PEPTIDE.A # -> -, PEPTIDE, A
1481
- # A.PEPTIDE # -> A, PEPTIDE, -
1482
- # PEPTIDE # -> nil,nil,nil
1483
- def self.split_sequence(val)
1484
- peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
1485
- pieces = val.split('.')
1486
- case pieces.size
1487
- when 3
1488
- peptide_prev_aa, peptide, peptide_next_aa = *pieces
1489
- when 2
1490
- if pieces[0].size > 1 ## N termini
1491
- peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
1492
- else ## C termini
1493
- peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
1494
- end
1495
- when 1 ## this must be a parse error!
1496
- peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
1497
- when 0
1498
- peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
1541
+ if hash
1542
+ self[0,19] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info]]
1499
1543
  end
1500
- return peptide_prev_aa, peptide, peptide_next_aa
1501
- end
1502
-
1503
- # removes nonstandard chars with Non_standard_amino_acid_char_re
1504
- # preserves A-Z and '.
1505
- def self.remove_non_amino_acids(sequence)
1506
- sequence.gsub(Non_standard_amino_acid_char_re, '')
1544
+ self
1507
1545
  end
1546
+ $VERBOSE = tmp_verb
1508
1547
 
1509
1548
  def inspect
1510
- var = @@methods.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
1549
+ var = @@attributes.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
1511
1550
  "#<SearchHit #{var}>"
1512
1551
  end
1513
1552
 
@@ -1515,7 +1554,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
1515
1554
  def self.calc_num_missed_cleavages(params, sequence)
1516
1555
  num_missed = 0
1517
1556
  split_after, except_before = params.enzyme_specificity
1518
- first, middle, last = self.split_sequence(sequence)
1557
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
1519
1558
  arr = middle.scan(/[#{split_after}][^#{except_before}]/)
1520
1559
  return arr.size
1521
1560
  end
@@ -1524,7 +1563,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
1524
1563
  def self.calc_num_tol_term(params, sequence)
1525
1564
  num_tol = 0
1526
1565
  split_after, except_before = params.enzyme_specificity
1527
- first, middle, last = self.split_sequence(sequence)
1566
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
1528
1567
  last_of_middle = middle[-1,1]
1529
1568
  first_of_middle = middle[0,1]
1530
1569
  if ( split_after.include?(first) && !except_before.include?(first_of_middle) ) || first == '-'
@@ -1552,15 +1591,23 @@ class SpecID::Sequest::PepXML::SearchHit < Array
1552
1591
  end
1553
1592
 
1554
1593
  def to_pepxml
1594
+ mod_pepxml =
1595
+ if self[18]
1596
+ self[18].to_pepxml
1597
+ else
1598
+ ''
1599
+ end
1600
+
1555
1601
  element_xml("search_hit", [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :protein, :num_tot_proteins, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected]) do
1556
- search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
1602
+ mod_pepxml +
1603
+ search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
1557
1604
  end
1558
1605
  end
1559
1606
 
1560
1607
  end
1561
1608
 
1562
1609
  # Positions and masses of modifications
1563
- class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
1610
+ class Sequest::PepXML::SearchHit::ModificationInfo
1564
1611
  include SpecIDXML
1565
1612
 
1566
1613
  ## Should be something like this:
@@ -1583,7 +1630,11 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
1583
1630
  attr_accessor :mod_aminoacid_mass_array
1584
1631
 
1585
1632
  def initialize(hash=nil)
1586
- instance_var_set_from_hash(hash)
1633
+ @mod_nterm_mass = nil
1634
+ @mod_cterm_mass = nil
1635
+ if hash
1636
+ instance_var_set_from_hash(hash)
1637
+ end
1587
1638
  end
1588
1639
 
1589
1640
  # Will escape any xml special chars in modified_peptide
@@ -1621,3 +1672,4 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
1621
1672
 
1622
1673
  end
1623
1674
 
1675
+