mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -7,6 +7,7 @@ require 'spec_id/bioworks'
7
7
  require 'instance_var_set_from_hash'
8
8
  require 'spec/msrun'
9
9
  require 'spec_id/srf'
10
+ require 'fileutils'
10
11
 
11
12
  class Numeric
12
13
  # returns a string with a + or - on the front
@@ -75,10 +76,10 @@ end
75
76
 
76
77
 
77
78
 
78
- module SpecID::Sequest; end
79
- class SpecID::Sequest::PepXML; end
79
+ module Sequest; end
80
+ class Sequest::PepXML; end
80
81
 
81
- class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
82
+ class Sequest::PepXML::MSMSPipelineAnalysis
82
83
  include SpecIDXML
83
84
  # Version 1.2.3
84
85
  attr_writer :date
@@ -106,7 +107,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
106
107
  def date
107
108
  if @date ; @date
108
109
  else
109
- case SpecID::Sequest::PepXML.pepxml_version
110
+ case Sequest::PepXML.pepxml_version
110
111
  when 18 ; tarr = Time.now.to_a ; tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
111
112
  when 0 ; Time.new.to_s
112
113
  end
@@ -132,7 +133,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
132
133
  end
133
134
 
134
135
  def to_pepxml
135
- case SpecID::Sequest::PepXML.pepxml_version
136
+ case Sequest::PepXML.pepxml_version
136
137
  when 0
137
138
  element_xml(:msms_pipeline_analysis, [:date, :summary_xml]) do
138
139
  @msms_run_summary.to_pepxml
@@ -142,13 +143,13 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
142
143
  @msms_run_summary.to_pepxml
143
144
  end
144
145
  else
145
- abort "Don't know how to deal with version: #{SpecID::Sequest::PepXML.pepxml_version}"
146
+ abort "Don't know how to deal with version: #{Sequest::PepXML.pepxml_version}"
146
147
  end
147
148
  end
148
149
 
149
150
  end
150
151
 
151
- class SpecID::Sequest::PepXML::MSMSRunSummary
152
+ class Sequest::PepXML::MSMSRunSummary
152
153
  include SpecIDXML
153
154
 
154
155
  # the version of TPP you are using (determines xml output)
@@ -184,7 +185,7 @@ class SpecID::Sequest::PepXML::MSMSRunSummary
184
185
  end
185
186
 
186
187
  def to_pepxml
187
- case SpecID::Sequest::PepXML.pepxml_version
188
+ case Sequest::PepXML.pepxml_version
188
189
  when 18
189
190
  element_xml_and_att_string(:msms_run_summary, "base_name=\"#{base_name}\" msManufacturer=\"#{ms_manufacturer}\" msModel=\"#{ms_model}\" msIonization=\"#{ms_ionization}\" msMassAnalyzer=\"#{ms_mass_analyzer}\" msDetector=\"#{ms_detector}\" raw_data_type=\"#{raw_data_type}\" raw_data=\"#{raw_data}\"") do
190
191
  sample_enzyme.to_pepxml +
@@ -210,7 +211,7 @@ end
210
211
 
211
212
 
212
213
 
213
- class SpecID::Sequest::PepXML
214
+ class Sequest::PepXML
214
215
  include SpecIDXML
215
216
 
216
217
  ## CREATE a default version for the entire class
@@ -292,21 +293,22 @@ class SpecID::Sequest::PepXML
292
293
  # objects. Ideally, we'd like these attributes to reside elsewhere, but for
293
294
  # memory concerns, this is best for now.
294
295
  def self._prot_num_and_first_prot_by_pep(pep_array)
295
- pep_array.hash_by(:sequence).each do |seq, pep_arr|
296
- prots = pep_arr.collect { |pep| pep.prot }
297
- prots.uniq!
298
- _size = prots.size
296
+ pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
297
+ prts = []
298
+ pep_arr.each { |pep| prts.push( *(pep.prots) ) }
299
+ prts.uniq!
300
+ _size = prts.size
299
301
  pep_arr.each do |pep|
300
302
  pep._num_prots = _size.to_s
301
- pep._first_prot = prots.first
303
+ pep._first_prot = prts.first
302
304
  end
303
305
  end
304
306
  end
305
307
 
306
308
 
307
- Default_Options = {
308
- :out_path => nil,
309
- :backup_db_path => '/project/marcotte/marcotte/ms/database',
309
+ Default_Options = {
310
+ :out_path => '.',
311
+ #:backup_db_path => '.',
310
312
  # a PepXML option
311
313
  :pepxml_version => DEF_VERSION,
312
314
  ## MSMSRunSummary options:
@@ -314,15 +316,18 @@ class SpecID::Sequest::PepXML
314
316
  # or create your own SampleEnzyme object
315
317
  :sample_enzyme => 'trypsin',
316
318
  :ms_manufacturer => 'ThermoFinnigan',
317
- :ms_model => 'LCQ Deca XP',
319
+ :ms_model => 'LCQ Deca XP Plus',
318
320
  :ms_ionization => 'ESI',
319
321
  :ms_mass_analyzer => 'Ion Trap',
320
322
  :ms_detector => 'UNKNOWN',
323
+ :ms_data => '.', # path to ms data files (raw or mzxml)
321
324
  :raw_data_type => "raw",
322
325
  :raw_data => ".mzXML", ## even if you don't have it?
323
326
  ## SearchSummary options:
324
327
  :out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
325
- :out_data => ".tgz" ## may be srf??
328
+ :out_data => ".tgz", ## may be srf??
329
+ :copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
330
+ :print => false, # print the objects to file
326
331
  }
327
332
 
328
333
  # will dynamically set :ms_model and :ms_mass_analyzer from srf info
@@ -330,23 +335,23 @@ class SpecID::Sequest::PepXML
330
335
  # and LCQ Deca XP
331
336
  # See SRF::Sequest::PepXML::Default_Options hash for defaults
332
337
  # unless given, the out_path will be given as the path of the srf_file
333
- def self.new_from_srf(srf_file, opts={})
338
+ # srf may be an object or a filename
339
+ def self.new_from_srf(srf, opts={})
334
340
  opts = Default_Options.merge(opts)
335
341
 
336
- ## set the outpath
337
- out_path = opts.delete(:out_path)
338
- unless out_path
339
- out_path = File.dirname(srf_file)
342
+ ## read the srf file
343
+ if srf.is_a? String
344
+ srf = SRF.new(srf)
340
345
  end
341
346
 
342
- ## read the srf file
343
- srf = SRF.new(srf_file)
347
+ ## set the outpath
348
+ out_path = opts.delete(:out_path)
344
349
 
345
350
  params = srf.params
346
351
 
347
352
  ## check to see if we need backup_db
348
353
  backup_db_path = opts.delete(:backup_db_path)
349
- unless File.exist? params.database
354
+ if !File.exist?(params.database) && backup_db_path
350
355
  params.database_path = backup_db_path
351
356
  end
352
357
 
@@ -374,24 +379,47 @@ class SpecID::Sequest::PepXML
374
379
 
375
380
  ## Create the search summary:
376
381
  search_summary_options = {
377
- :search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params),
382
+ :search_database => Sequest::PepXML::SearchDatabase.new(params),
378
383
  :base_name => full_base_name_no_ext,
379
384
  :out_data_type => out_data_type,
380
385
  :out_data => out_data
381
386
  }
382
- opts[:search_summary] = SpecID::Sequest::PepXML::SearchSummary.new( params, search_summary_options)
387
+ modifications_string = srf.header.modifications
388
+ search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
383
389
 
384
390
  ## Create the SampleEnzyme object if necessary
385
391
  unless opts[:sample_enzyme].is_a? SampleEnzyme
386
392
  opts[:sample_enzyme] = SampleEnzyme.new(opts[:sample_enzyme])
387
393
  end
388
394
 
389
- ## Create the pepxml obj
390
- pepxml_obj = SpecID::Sequest::PepXML.new(ppxml_version, params)
395
+ ## Create the pepxml obj and top level objects
396
+ pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
397
+ pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
398
+ pepxml_obj.msms_pipeline_analysis = pipeline
399
+ pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
400
+ pipeline.msms_run_summary.search_summary = search_summary
401
+ modifications_obj = search_summary.modifications
402
+
391
403
  ## name some common variables we'll need
392
404
  h_plus = pepxml_obj.h_plus
393
405
  avg_parent = pepxml_obj.avg_parent
394
406
 
407
+
408
+ ## COPY MZXML FILES IF NECESSARY
409
+ if opts[:copy_mzxml]
410
+ mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
411
+ to_copy = Spec::MzXML.file_to_mzxml(mzxml_pathname_noext)
412
+ if to_copy
413
+ FileUtils.cp to_copy, out_path
414
+ else
415
+ puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
416
+ puts "Perhaps you need to specifiy the location of the raw data"
417
+ puts "or need an mzXML converter (readw.exe or t2x)"
418
+ exit
419
+ end
420
+ end
421
+
422
+
395
423
  #######################################################################
396
424
  # CREATE the spectrum_queries_ar
397
425
  #######################################################################
@@ -420,6 +448,8 @@ class SpecID::Sequest::PepXML
420
448
  deltacnstar = '1'
421
449
  end
422
450
 
451
+
452
+
423
453
  ## mass calculations:
424
454
  precursor_neutral_mass = dta_file.mh - h_plus
425
455
  calc_neutral_pep_mass = top_hit[0] - h_plus
@@ -428,6 +458,9 @@ class SpecID::Sequest::PepXML
428
458
  else ; massdiff = massdiff.to_s end
429
459
 
430
460
  (start_scan, end_scan, charge) = srf_index[i]
461
+
462
+
463
+
431
464
  sq_hash = {
432
465
  :spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
433
466
  :start_scan => start_scan,
@@ -438,9 +471,13 @@ class SpecID::Sequest::PepXML
438
471
  :index => files_with_hits_index,
439
472
  }
440
473
 
474
+ spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
475
+
476
+ sequence = top_hit[8]
477
+
441
478
  # NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
442
479
  ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
443
- (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_hit[8])
480
+ (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
444
481
  # ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
445
482
 
446
483
  sh_hash = {
@@ -448,14 +485,14 @@ class SpecID::Sequest::PepXML
448
485
  :peptide => pepseq,
449
486
  :peptide_prev_aa => prevaa,
450
487
  :peptide_next_aa => nextaa,
451
- :protein => top_hit[9].split(" ").first,
452
- :num_tot_proteins => top_hit[10],
488
+ :protein => top_hit[9].first.reference.split(" ").first,
489
+ :num_tot_proteins => top_hit[9].size,
453
490
  :num_matched_ions => top_hit[6],
454
491
  :tot_num_ions => top_hit[7],
455
492
  :calc_neutral_pep_mass => calc_neutral_pep_mass,
456
493
  :massdiff => massdiff,
457
- :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_hit[8]),
458
- :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_hit[8]),
494
+ :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, sequence),
495
+ :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, sequence),
459
496
  :is_rejected => '0',
460
497
  # These are search score attributes:
461
498
  :xcorr => top_hit[3],
@@ -463,51 +500,88 @@ class SpecID::Sequest::PepXML
463
500
  :deltacnstar => deltacnstar,
464
501
  :spscore => top_hit[2],
465
502
  :sprank => top_hit[5],
503
+ :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
466
504
  }
505
+ search_hit = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
467
506
 
468
- spectrum_queries_arr[files_with_hits_index] = SpecID::Sequest::PepXML::SpectrumQuery.new(sq_hash) do
469
- search_result = SpecID::Sequest::PepXML::SearchResult.new do
470
- [ SpecID::Sequest::PepXML::SearchHit.new(sh_hash) ] # there can be multiple hits
471
- end # SearchResult
472
- [search_result] # can be multiple
473
- end
507
+ search_result = Sequest::PepXML::SearchResult.new
508
+ search_result.search_hits = [search_hit]
509
+ spectrum_query.search_results = [search_result]
510
+ spectrum_queries_arr[files_with_hits_index] = spectrum_query
474
511
  end
475
512
  spectrum_queries_arr.compact!
476
513
 
477
- #######################################################################
478
- # ADD the pipeline analysis
479
- #######################################################################
480
-
481
- pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'}) do
482
- SpecID::Sequest::PepXML::MSMSRunSummary.new(opts) { spectrum_queries_arr }
483
- end
484
- pepxml_obj.msms_pipeline_analysis = pipeline
514
+ pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
485
515
  pepxml_obj.base_name = pipeline.msms_run_summary.base_name
516
+ pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
517
+
486
518
  pepxml_obj
487
519
  end
488
520
 
521
+ # takes an .srg or bioworks.xml file
522
+ # if possible, ensures that an mzXML file is present for each pepxml file
523
+ # :print => true, will print files
524
+ def self.set_from_bioworks(bioworks_file, opts={})
525
+ opts = Default_Options.merge(opts)
526
+ ## Create the out_path directory if necessary
527
+
528
+ unless File.exist? opts[:out_path]
529
+ FileUtils.mkpath(opts[:out_path])
530
+ end
531
+ unless File.directory? opts[:out_path]
532
+ abort "#{opts[:out_path]} must be a directory!"
533
+ end
534
+
535
+ spec_id = SpecID.new(bioworks_file)
536
+ pepxml_objs =
537
+ if spec_id.is_a? Bioworks
538
+ abort("must have opts[:params] set!") unless opts[:params]
539
+ set_from_bioworks_xml(bioworks_file, opts[:params], opts)
540
+ elsif spec_id.is_a? SRFGroup
541
+ spec_id.srfs.map do |srf|
542
+ new_from_srf(srf, opts)
543
+ end
544
+ else
545
+ abort "invalid object"
546
+ end
547
+
548
+ if opts[:print]
549
+ pepxml_objs.each do |obj|
550
+ obj.to_pepxml(obj.base_name + ".xml")
551
+ end
552
+ end
553
+ pepxml_objs
554
+ end
555
+
556
+
489
557
  # Takes bioworks 3.2/3.3 xml output (with no filters)
490
558
  # Returns a list of PepXML objects
491
- # msdata = path to mzXML files (or .timeIndex files) (or @TODO: path to sqt file(s))
492
559
  # params = sequest.params file
493
560
  # bioworks = bioworks.xml exported multi-consensus view file
494
561
  # pepxml_version = 0 for tpp 1.2.3
495
562
  # pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
496
- def self.set_from_bioworks(params, bioworks, msdata, out_path, pepxml_version=18, sample_enzyme='trypsin', ms_manufacturer='ThermoFinnigan', ms_model='LCQ Deca XP Plus', ms_ionization='ESI', ms_mass_analyzer='Ion Trap', ms_detector='UNKNOWN', raw_data_type="raw", raw_data=".mzXML", out_data_type="out", out_data=".tgz")
563
+ def self.set_from_bioworks_xml(bioworks, params, opts={})
564
+ opts = Default_Options.merge(opts)
565
+ pepxml_version, sample_enzyme, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :sample_enzyme, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
566
+
567
+ unless out_path
568
+ out_path = '.'
569
+ end
570
+
497
571
  supported_versions = [0,18]
498
572
 
499
- unless supported_versions.include?(pepxml_version)
573
+ unless supported_versions.include?(opts[:pepxml_version])
500
574
  abort "pepxml_version: #{pepxml_version} not currently supported. Current support is for versions #{supported_versions.join(', ')}"
501
575
  end
502
576
 
503
577
  ## Turn params and bioworks_obj into objects if necessary:
504
578
  # Params:
505
- if params.class == SpecID::Sequest::Params # OK!
506
- elsif params.class == String ; params = SpecID::Sequest::Params.new(params)
579
+ if params.class == Sequest::Params # OK!
580
+ elsif params.class == String ; params = Sequest::Params.new(params)
507
581
  else ; abort "Don't recognize #{params} as object or string!"
508
582
  end
509
583
  # Bioworks:
510
- if bioworks.class == SpecID::Bioworks # OK!
584
+ if bioworks.class == Bioworks # OK!
511
585
  elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
512
586
  else ; abort "Don't recognize #{bioworks} as object or string!"
513
587
  end
@@ -516,39 +590,98 @@ class SpecID::Sequest::PepXML
516
590
 
517
591
  ## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
518
592
  ## bioworks
519
- #bioworks.peps.each { |pep| if pep.class != SpecID::Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
593
+ #bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
520
594
 
595
+ ## check to see if we need backup_db
596
+
597
+ backup_db_path = opts.delete(:backup_db_path)
598
+ if !File.exist?(params.database) && backup_db_path
599
+ params.database_path = backup_db_path
600
+ end
521
601
 
522
602
  ## Start
523
603
  split_bio_objs = []
524
604
 
525
- ## Create a hash by pep object containing num_tot_proteins
526
- ## This is only valid if all hits are present (no previous thresholding)
527
- self._prot_num_and_first_prot_by_pep(bioworks.peps)
528
605
  ## (num_prots_by_pep, prot_by_pep) =
529
606
  #num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"
530
607
 
608
+ modifications_string = bioworks.modifications
609
+ search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
610
+ modifications_obj = search_summary.modifications
611
+
531
612
  ## Create a hash of spectrum_query arrays by filename (this very big block):
532
613
  spectrum_queries_by_base_name = {}
533
- pepxml_objs_by_base_name = {}
534
614
  # Hash by the filenames to split into filenames:
535
- bioworks.peps.hash_by(:base_name).each do |base_name, pep_arr|
615
+ bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|
536
616
 
617
+ pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
618
+ full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
619
+
620
+ case pepxml_version
621
+ when 18
622
+ pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
623
+ msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
624
+ :base_name => full_base_name_no_ext,
625
+ :ms_manufacturer => ms_manufacturer,
626
+ :ms_model => ms_model,
627
+ :ms_ionization => ms_ionization,
628
+ :ms_mass_analyzer => ms_mass_analyzer,
629
+ :ms_detector => ms_detector,
630
+ :raw_data_type => raw_data_type,
631
+ :raw_data => raw_data,
632
+ :sample_enzyme => SampleEnzyme.new(sample_enzyme),
633
+ :search_summary => search_summary,
634
+ })
635
+ pipeline.msms_run_summary = msms_run_summary
636
+ pepxml_obj.msms_pipeline_analysis = pipeline
637
+ pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name = full_base_name_no_ext
638
+ pepxml_obj.base_name = full_base_name_no_ext
639
+ pepxml_obj
640
+ when 0
641
+ ## @TODO: NEED TO REVAMP THIS:
642
+ # Sequest::PepXML.new(pepxml_version).set_from_hash({
643
+ # :params => params,
644
+ # :search_results => spectrum_queries_arr,
645
+ # :base_name => self.make_base_name( File.expand_path(out_path), base_name),
646
+ # :search_engine => params.search_engine,
647
+ # :database => params.database,
648
+ # :raw_data_type => "mzXML",
649
+ # :raw_data => ".mzXML",
650
+ # :out_data_type => "out",
651
+ # :out_data => ".tgz",
652
+ # :sample_enzyme => params.enzyme,
653
+ # })
654
+ end
655
+
656
+
657
+
658
+
659
+
660
+ # Create a hash by pep object containing num_tot_proteins
661
+ # This is only valid if all hits are present (no previous thresholding)
662
+ # Since out2summary only acts on one folder at a time,
663
+ # we should only do it for one folder at a time! (that's why we do this
664
+ # here instead of globally)
665
+ self._prot_num_and_first_prot_by_pep(pep_arr)
537
666
  prec_mz_arr = nil
538
667
  case x = bioworks.version
539
668
  when /3.2/
540
669
  calc_prec_by = :prec_mz_arr
541
670
  # get the precursor_mz array for this filename
542
- inner__full_base_name_no_ext = File.join(msdata, base_name)
543
- prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(inner__full_base_name_no_ext)
671
+ prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(File.join(ms_data, base_name))
544
672
  when /3.3/
545
673
  calc_prec_by = :deltamass
546
674
  else
547
675
  abort "invalid BioworksBrowser version: #{x}"
548
676
  end
549
677
 
550
- pepxml_obj = SpecID::Sequest::PepXML.new(pepxml_version, params)
551
- pepxml_objs_by_base_name[base_name] = pepxml_obj
678
+ if opts[:copy_mzxml]
679
+ to_copy = Spec::MzXML.file_to_mzxml(File.join(ms_data, base_name))
680
+ if to_copy
681
+ FileUtils.cp to_copy, out_path
682
+ end
683
+ end
684
+
552
685
 
553
686
  spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
554
687
 
@@ -561,9 +694,9 @@ class SpecID::Sequest::PepXML
561
694
 
562
695
  case calc_prec_by
563
696
  when :prec_mz_arr
564
- precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
697
+ precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
565
698
  when :deltamass
566
- precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
699
+ precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
567
700
  end
568
701
 
569
702
  calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
@@ -581,98 +714,58 @@ class SpecID::Sequest::PepXML
581
714
  end
582
715
  # Create the nested structure of queries{results{hits}}
583
716
  # (Ruby's blocks work beautifully for things like this)
584
- spec_query = SpecID::Sequest::PepXML::SpectrumQuery.new({
717
+ spec_query = Sequest::PepXML::SpectrumQuery.new({
585
718
  :spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
586
719
  :start_scan => top_pep.first_scan,
587
720
  :end_scan => top_pep.last_scan,
588
721
  :precursor_neutral_mass => precursor_neutral_mass.to_s,
589
722
  :assumed_charge => top_pep.charge,
590
723
  :pepxml_version => pepxml_version,
591
- }) do
592
- search_result = SpecID::Sequest::PepXML::SearchResult.new do
593
-
594
- ## Calculate some interdependent values;
595
- # NOTE: the bioworks mass is really M+H if two or more scans went
596
- # into the search_hit; calc_neutral_pep_mass is simply the avg of
597
- # precursor masses adjusted to be neutral
598
- (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_pep.sequence)
599
- (num_matched_ions, tot_num_ions) = SpecID::Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
600
- search_hit = SpecID::Sequest::PepXML::SearchHit.new({
601
- :hit_rank => "1",
602
- :peptide => pepseq,
603
- :peptide_prev_aa => prevaa,
604
- :peptide_next_aa => nextaa,
605
- :protein => top_pep._first_prot.reference.split(" ").first,
606
- :num_tot_proteins => top_pep._num_prots,
607
- :num_matched_ions => num_matched_ions,
608
- :tot_num_ions => tot_num_ions,
609
- :calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
610
- :massdiff => massdiff,
611
- :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
612
- :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
613
- :is_rejected => "0",
614
- # These are search score attributes:
615
- :xcorr => top_pep.xcorr,
616
- :deltacn => top_pep.deltacn,
617
- :deltacnstar => deltacnstar,
618
- :spscore => top_pep.sp,
619
- :sprank => top_pep.rsp,
620
- })
621
- [search_hit] # there can be multiple search hits
622
- end # SearchResult
623
- [search_result] # can be multiple search_results
624
- end # SpectrumQuery
625
- end # Collects the spectrum queries
724
+ })
725
+
726
+
727
+ search_result = Sequest::PepXML::SearchResult.new
728
+
729
+ ## Calculate some interdependent values;
730
+ # NOTE: the bioworks mass is reallyf M+H if two or more scans went
731
+ # into the search_hit; calc_neutral_pep_mass is simply the avg of
732
+ # precursor masses adjusted to be neutral
733
+ (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
734
+ (num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
735
+ search_hit = Sequest::PepXML::SearchHit.new({
736
+ :hit_rank => "1",
737
+ :peptide => pepseq,
738
+ :peptide_prev_aa => prevaa,
739
+ :peptide_next_aa => nextaa,
740
+ :protein => top_pep._first_prot.reference.split(" ").first,
741
+ :num_tot_proteins => top_pep._num_prots,
742
+ :num_matched_ions => num_matched_ions,
743
+ :tot_num_ions => tot_num_ions,
744
+ :calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
745
+ :massdiff => massdiff,
746
+ :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
747
+ :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
748
+ :is_rejected => "0",
749
+ # These are search score attributes:
750
+ :xcorr => top_pep.xcorr,
751
+ :deltacn => top_pep.deltacn,
752
+ :deltacnstar => deltacnstar,
753
+ :spscore => top_pep.sp,
754
+ :sprank => top_pep.rsp,
755
+ :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
756
+ })
757
+ search_result.search_hits = [search_hit] # there can be multiple search hits
758
+ spec_query.search_results = [search_result] # can be multiple search_results
759
+ spec_query
760
+ end
626
761
 
627
762
  # create an index by spectrum as results end up typically in out2summary
628
763
  # (I really dislike this order, however)
629
764
  spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
630
765
  spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
631
-
632
- spectrum_queries_by_base_name[base_name] = spectrum_queries_ar
633
- end
634
-
635
- modifications_string = bioworks.modifications
636
-
637
- spectrum_queries_by_base_name.collect do |base_name, spectrum_queries_ar|
638
- case pepxml_version
639
- when 18
640
- pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'}) do
641
- full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
642
- SpecID::Sequest::PepXML::MSMSRunSummary.new({
643
- :base_name => full_base_name_no_ext,
644
- :ms_manufacturer => ms_manufacturer,
645
- :ms_model => ms_model,
646
- :ms_ionization => ms_ionization,
647
- :ms_mass_analyzer => ms_mass_analyzer,
648
- :ms_detector => ms_detector,
649
- :raw_data_type => raw_data_type,
650
- :raw_data => raw_data,
651
- :sample_enzyme => SampleEnzyme.new(sample_enzyme),
652
- :search_summary => SpecID::Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params), :base_name => full_base_name_no_ext, :out_data_type => out_data_type, :out_data => out_data}),
653
- }) { spectrum_queries_ar }
654
- end
655
- pepxml_obj = pepxml_objs_by_base_name[base_name]
656
- pepxml_obj.msms_pipeline_analysis = pipeline
657
- pepxml_obj.base_name = pipeline.msms_run_summary.base_name
658
- pepxml_obj
659
- when 0
660
- ## @TODO: NEED TO REVAMP THIS:
661
- # SpecID::Sequest::PepXML.new(pepxml_version).set_from_hash({
662
- # :params => params,
663
- # :search_results => spectrum_queries_arr,
664
- # :base_name => self.make_base_name( File.expand_path(out_path), base_name),
665
- # :search_engine => params.search_engine,
666
- # :database => params.database,
667
- # :raw_data_type => "mzXML",
668
- # :raw_data => ".mzXML",
669
- # :out_data_type => "out",
670
- # :out_data => ".tgz",
671
- # :sample_enzyme => params.enzyme,
672
- # })
673
- end
674
- end # collects the pepxml objects
675
-
766
+ pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
767
+ pepxml_obj
768
+ end ## collects pepxml_objs
676
769
  end
677
770
 
678
771
  def summary_xml
@@ -724,7 +817,7 @@ end # PepXML
724
817
  ##
725
818
  # In the future, this guy should accept any version of bioworks params file
726
819
  # and spit out any param queried.
727
- class SpecID::Sequest::Params
820
+ class Sequest::Params
728
821
  include SpecIDXML
729
822
 
730
823
  # current attributes supported are:
@@ -941,7 +1034,7 @@ class SpecID::Sequest::Params
941
1034
 
942
1035
  end
943
1036
 
944
- class SpecID::Sequest::PepXML::SearchResult
1037
+ class Sequest::PepXML::SearchResult
945
1038
  include SpecIDXML
946
1039
  # an array of search_hits
947
1040
  attr_accessor :search_hits
@@ -959,7 +1052,7 @@ class SpecID::Sequest::PepXML::SearchResult
959
1052
  end
960
1053
  end
961
1054
 
962
- class SpecID::Sequest::PepXML::SearchSummary
1055
+ class Sequest::PepXML::SearchSummary
963
1056
  include SpecIDXML
964
1057
  attr_accessor :params
965
1058
  attr_accessor :base_name
@@ -974,7 +1067,7 @@ class SpecID::Sequest::PepXML::SearchSummary
974
1067
  def initialize(params, modifications_string='', args=nil)
975
1068
  @search_id = nil
976
1069
  @params = params
977
- @modifications = SpecID::Sequest::PepXML::Modifications.new(params, modifications_string)
1070
+ @modifications = Sequest::PepXML::Modifications.new(params, modifications_string)
978
1071
  if args ; set_from_hash(args) end
979
1072
  end
980
1073
 
@@ -999,7 +1092,7 @@ class SpecID::Sequest::PepXML::SearchSummary
999
1092
 
1000
1093
  end
1001
1094
 
1002
- class SpecID::Sequest::PepXML::Modifications
1095
+ class Sequest::PepXML::Modifications
1003
1096
  include SpecIDXML
1004
1097
 
1005
1098
  # sequest params object
@@ -1032,20 +1125,27 @@ class SpecID::Sequest::PepXML::Modifications
1032
1125
 
1033
1126
  # set the masses_by_diff_mod and mod_symbols_hash from
1034
1127
  def set_hashes(modification_symbols_string)
1128
+
1035
1129
  @mod_symbols_hash = {}
1036
1130
  @masses_by_diff_mod = {}
1037
- if modification_symbols_string == nil || modification_symbols_string == ''
1131
+ if (modification_symbols_string == nil || modification_symbols_string == '')
1038
1132
  return nil
1039
1133
  end
1040
1134
  table = @params.mass_table
1041
1135
  modification_symbols_string.split(/\)\s+\(/).each do |mod|
1042
- if mod =~ /\(?(\w{1,2})(.) (.[\d\.]+)\)?/
1043
- aa_as_sym = $1.to_sym,
1044
- @mod_symbols_hash[[aa_as_sym, $3.to_f]] = $2.dup
1136
+ if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
1045
1137
  if $1 == 'ct' || $1 == 'nt'
1046
- @masses_by_diff_mod[$2] = $3.to_f
1138
+ mass_diff = $3.to_f
1139
+ @masses_by_diff_mod[$2] = mass_diff
1140
+ @mod_symbols_hash[[$1, mass_diff]] = $2.dup
1047
1141
  else
1048
- @masses_by_diff_mod[$1+$2] = $3.to_f + table[aa_as_sym]
1142
+ symbol_string = $2.dup
1143
+ mass_diff = $3.to_f
1144
+ $1.split('').each do |aa|
1145
+ aa_as_sym = aa.to_sym
1146
+ @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
1147
+ @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
1148
+ end
1049
1149
  end
1050
1150
  end
1051
1151
  end
@@ -1058,8 +1158,8 @@ class SpecID::Sequest::PepXML::Modifications
1058
1158
  if @masses_by_diff_mod.size == 0
1059
1159
  return nil
1060
1160
  end
1061
- hash[:modified_peptide] = peptide.dup
1062
1161
  hash = {}
1162
+ hash[:modified_peptide] = peptide.dup
1063
1163
  hsh = @masses_by_diff_mod
1064
1164
  table = @params.mass_table
1065
1165
  h = table[:h] # this? or h_plus ??
@@ -1068,12 +1168,13 @@ class SpecID::Sequest::PepXML::Modifications
1068
1168
  if hsh.key? peptide[0,1]
1069
1169
  # AA + H + differential_mod
1070
1170
  hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
1071
- peptide.slice!( 1..-1 )
1171
+ peptide = peptide[1...(peptide.size)]
1072
1172
  end
1073
- if hsh.key? peptide[-1,1]
1173
+ if hsh.key? peptide[(peptide.size-1),1]
1074
1174
  # AA + OH + differential_mod
1075
- hash[:mod_cterm_mass] = table[peptide[-2,1].to_sym] + oh + hsh[peptide[-1,1]]
1175
+ hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
1076
1176
  peptide.slice!( 0..-2 )
1177
+ peptide = peptide[0...(peptide.size-1)]
1077
1178
  end
1078
1179
  mod_array = []
1079
1180
  (0...peptide.size).each do |i|
@@ -1084,8 +1185,8 @@ class SpecID::Sequest::PepXML::Modifications
1084
1185
  if mod_array.size > 0
1085
1186
  hash[:mod_aminoacid_mass_array] = mod_array
1086
1187
  end
1087
- if hash.size > 0
1088
- SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
1188
+ if hash.size > 1 # if there is more than just the modified peptide there
1189
+ Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
1089
1190
  else
1090
1191
  nil
1091
1192
  end
@@ -1127,7 +1228,7 @@ class SpecID::Sequest::PepXML::Modifications
1127
1228
  :variable => 'N',
1128
1229
  :binary => 'Y',
1129
1230
  }
1130
- SpecID::Sequest::PepXML::AAModification.new(hash)
1231
+ Sequest::PepXML::AAModification.new(hash)
1131
1232
  end
1132
1233
 
1133
1234
  ## Create the static_terminal_mods objects
@@ -1149,7 +1250,7 @@ class SpecID::Sequest::PepXML::Modifications
1149
1250
  :description => mod[0],
1150
1251
  }
1151
1252
  hash[:protein_terminus] = protein_terminus if protein_terminus
1152
- SpecID::Sequest::PepXML::TerminalModification.new(hash)
1253
+ Sequest::PepXML::TerminalModification.new(hash)
1153
1254
  end
1154
1255
  #################################
1155
1256
  # Variable Mods:
@@ -1159,20 +1260,25 @@ class SpecID::Sequest::PepXML::Modifications
1159
1260
  variable_mods = []
1160
1261
  (0...arr.size).step(2) do |i|
1161
1262
  if arr[i].to_f != 0.0
1162
- variable_mods << [arr[i+1].to_sym, arr[i].to_f]
1263
+ variable_mods << [arr[i+1], arr[i].to_f]
1163
1264
  end
1164
1265
  end
1165
- variable_mods.map! do |mod|
1166
- hash = {
1167
- :aminoacid => mod[0].to_s,
1168
- :massdiff => mod[1].to_plus_minus_string,
1169
- :mass => aa_hash[mod[0]] + mod[1],
1170
- :variable => 'Y',
1171
- :binary => 'N',
1172
- :symbol => @mod_symbols_hash[mod],
1173
- }
1174
- SpecID::Sequest::PepXML::AAModification.new(hash)
1266
+ mod_objects = []
1267
+ variable_mods.each do |mod|
1268
+ mod[0].split('').each do |aa|
1269
+ hash = {
1270
+
1271
+ :aminoacid => aa,
1272
+ :massdiff => mod[1].to_plus_minus_string,
1273
+ :mass => aa_hash[aa.to_sym] + mod[1],
1274
+ :variable => 'Y',
1275
+ :binary => 'N',
1276
+ :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
1277
+ }
1278
+ mod_objects << Sequest::PepXML::AAModification.new(hash)
1279
+ end
1175
1280
  end
1281
+ variable_mods = mod_objects
1176
1282
  #################################
1177
1283
  # TERMINAL Variable Mods:
1178
1284
  #################################
@@ -1194,7 +1300,7 @@ class SpecID::Sequest::PepXML::Modifications
1194
1300
  :variable => 'Y',
1195
1301
  :symbol => symb,
1196
1302
  }
1197
- SpecID::Sequest::PepXML::TerminalModification.new(hash)
1303
+ Sequest::PepXML::TerminalModification.new(hash)
1198
1304
  end
1199
1305
 
1200
1306
  #########################
@@ -1221,7 +1327,7 @@ end
1221
1327
 
1222
1328
  # Modified aminoacid, static or variable
1223
1329
  # unless otherwise stated, all attributes can be anything
1224
- class SpecID::Sequest::PepXML::AAModification
1330
+ class Sequest::PepXML::AAModification
1225
1331
  include SpecIDXML
1226
1332
 
1227
1333
  # The amino acid (one letter code)
@@ -1256,7 +1362,7 @@ class SpecID::Sequest::PepXML::AAModification
1256
1362
  end
1257
1363
 
1258
1364
  # Modified aminoacid, static or variable
1259
- class SpecID::Sequest::PepXML::TerminalModification
1365
+ class Sequest::PepXML::TerminalModification
1260
1366
  include SpecIDXML
1261
1367
 
1262
1368
  # n for N-terminus, c for C-terminus
@@ -1285,7 +1391,7 @@ class SpecID::Sequest::PepXML::TerminalModification
1285
1391
  end
1286
1392
 
1287
1393
 
1288
- class SpecID::Sequest::PepXML::SearchDatabase
1394
+ class Sequest::PepXML::SearchDatabase
1289
1395
  include SpecIDXML
1290
1396
  attr_accessor :local_path
1291
1397
  attr_writer :seq_type
@@ -1316,7 +1422,7 @@ class SpecID::Sequest::PepXML::SearchDatabase
1316
1422
 
1317
1423
  end
1318
1424
 
1319
- class SpecID::Sequest::PepXML::SpectrumQuery
1425
+ class Sequest::PepXML::SpectrumQuery
1320
1426
  include SpecIDXML
1321
1427
 
1322
1428
  # basename_noext.first_scan.last_scan.charge
@@ -1344,7 +1450,7 @@ class SpecID::Sequest::PepXML::SpectrumQuery
1344
1450
  # FOR PEPXML:
1345
1451
  ############################################################
1346
1452
  def to_pepxml
1347
- case SpecID::Sequest::PepXML.pepxml_version
1453
+ case Sequest::PepXML.pepxml_version
1348
1454
  when 18
1349
1455
  element_xml("spectrum_query", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
1350
1456
  @search_results.collect { |sr| sr.to_pepxml }.join
@@ -1412,102 +1518,35 @@ class SpecID::Sequest::PepXML::SpectrumQuery
1412
1518
 
1413
1519
  end
1414
1520
 
1415
- # This object inherits from Array. As such, it is very memory efficient
1416
- # (compared to a normal object). However, certain operations when used on
1417
- # these objects will produce undesirable results: An array of these objects
1418
- # will be flattened (becoming a long list of attributes) when 'flatten' is
1419
- # called on them, which is not the behavior we want! other odd behavior is
1420
- # possible. Possible fixes are to use a delegate class or redefine the way
1421
- # this responds to flatten (so that it won't flatten).
1422
- class SpecID::Sequest::PepXML::SearchHit < Array
1521
+
1522
+
1523
+ Sequest::PepXML::SearchHit = ArrayClass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info) )
1524
+
1525
+ # hit_rank=0 peptide=1 peptide_prev_aa=2 peptide_next_aa=3 protein=4 num_tot_proteins=5 num_matched_ions=6 tot_num_ions=7 calc_neutral_pep_mass=8 massdiff=9 num_tol_term=10 num_missed_cleavages=11 is_rejected=12 deltacnstar=13 xcorr=14 deltacn=15 spscore=16 sprank=17 modification_info=18
1526
+
1527
+ class Sequest::PepXML::SearchHit
1423
1528
  include SpecIDXML
1424
1529
 
1425
1530
  Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
1426
1531
 
1427
- # num_tot_proteins = "Number of unique proteins in search database containing peptide"
1428
- #attr_accessor 0:hit_rank, 1:peptide, 2:peptide_prev_aa, 3:peptide_next_aa, 4:protein, 5:num_tot_proteins, 6:num_matched_ions, 7:tot_num_ions, 8:calc_neutral_pep_mass, 9:massdiff, 10:num_tol_term, 11:num_missed_cleavages, 12:is_rejected
1429
- #attr_accessor 13:deltacnstar
1430
- #attr_accessor 14:xcorr, 15:deltacn, 16:spscore, 17:sprank
1431
- ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
1432
-
1433
- ind_keys = {:hit_rank => 0, :peptide => 1, :peptide_prev_aa => 2, :peptide_next_aa => 3, :protein => 4, :num_tot_proteins => 5, :num_matched_ions => 6, :tot_num_ions => 7, :calc_neutral_pep_mass => 8, :massdiff => 9, :num_tol_term => 10, :num_missed_cleavages => 11, :is_rejected => 12, :deltacnstar => 13, :xcorr => 14, :deltacn => 15, :spscore => 16, :sprank => 17}
1434
- @@methods = ind_keys.keys
1435
- def hit_rank ; self[0] end ; def hit_rank=(oth) ; self[0] = oth end
1436
- def peptide ; self[1] end ; def peptide=(oth) ; self[1] = oth end
1437
- def peptide_prev_aa ; self[2] end ; def peptide_prev_aa=(oth) ; self[2] = oth end
1438
- def peptide_next_aa ; self[3] end ; def peptide_next_aa=(oth) ; self[3] = oth end
1439
- def protein ; self[4] end ; def protein=(oth) ; self[4] = oth end
1440
- def num_tot_proteins ; self[5] end ; def num_tot_proteins=(oth) ; self[5] = oth end
1441
- def num_matched_ions ; self[6] end ; def num_matched_ions=(oth) ; self[6] = oth end
1442
- def tot_num_ions ; self[7] end ; def tot_num_ions=(oth) ; self[7] = oth end
1443
- def calc_neutral_pep_mass ; self[8] end ; def calc_neutral_pep_mass=(oth) ; self[8] = oth end
1444
- def massdiff ; self[9] end ; def massdiff=(oth) ; self[9] = oth end
1445
- def num_tol_term ; self[10] end ; def num_tol_term=(oth) ; self[10] = oth end
1446
- def num_missed_cleavages ; self[11] end ; def num_missed_cleavages=(oth) ; self[11] = oth end
1447
- def is_rejected ; self[12] end ; def is_rejected=(oth) ; self[12] = oth end
1448
- def deltacnstar ; self[13] end ; def deltacnstar=(oth) ; self[13] = oth end
1449
- def xcorr ; self[14] end ; def xcorr=(oth) ; self[14] = oth end
1450
- def deltacn ; self[15] end ; def deltacn=(oth) ; self[15] = oth end
1451
- def spscore ; self[16] end ; def spscore=(oth) ; self[16] = oth end
1452
- def sprank ; self[17] end ; def sprank=(oth) ; self[17] = oth end
1453
-
1454
- @@arr_size = ind_keys.size
1455
- ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
1456
- ind_keys.merge!(ind_keys_w_eq)
1457
- ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
1458
1532
 
1459
1533
  # These are all search_score elements:
1460
1534
 
1461
1535
  # 1 if there is no second ranked hit, 0 otherwise
1462
1536
 
1537
+ tmp_verb = $VERBOSE
1538
+ $VERBOSE = nil
1463
1539
  def initialize(hash=nil)
1464
1540
  super(@@arr_size)
1465
- self[0,18] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank]]
1466
- self
1467
- #if hash ; set_from_hash(hash) end
1468
- end
1469
-
1470
- # remove_non_amino_acids && split_sequence
1471
- def self.prepare_sequence(val)
1472
- nv = remove_non_amino_acids(val)
1473
- split_sequence(nv)
1474
- end
1475
-
1476
- # Returns prev, peptide, next from sequence. Parse errors return
1477
- # nil,nil,nil
1478
- # R.PEPTIDE.A # -> R, PEPTIDE, A
1479
- # R.PEPTIDE.- # -> R, PEPTIDE, -
1480
- # PEPTIDE.A # -> -, PEPTIDE, A
1481
- # A.PEPTIDE # -> A, PEPTIDE, -
1482
- # PEPTIDE # -> nil,nil,nil
1483
- def self.split_sequence(val)
1484
- peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
1485
- pieces = val.split('.')
1486
- case pieces.size
1487
- when 3
1488
- peptide_prev_aa, peptide, peptide_next_aa = *pieces
1489
- when 2
1490
- if pieces[0].size > 1 ## N termini
1491
- peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
1492
- else ## C termini
1493
- peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
1494
- end
1495
- when 1 ## this must be a parse error!
1496
- peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
1497
- when 0
1498
- peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
1541
+ if hash
1542
+ self[0,19] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info]]
1499
1543
  end
1500
- return peptide_prev_aa, peptide, peptide_next_aa
1501
- end
1502
-
1503
- # removes nonstandard chars with Non_standard_amino_acid_char_re
1504
- # preserves A-Z and '.
1505
- def self.remove_non_amino_acids(sequence)
1506
- sequence.gsub(Non_standard_amino_acid_char_re, '')
1544
+ self
1507
1545
  end
1546
+ $VERBOSE = tmp_verb
1508
1547
 
1509
1548
  def inspect
1510
- var = @@methods.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
1549
+ var = @@attributes.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
1511
1550
  "#<SearchHit #{var}>"
1512
1551
  end
1513
1552
 
@@ -1515,7 +1554,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
1515
1554
  def self.calc_num_missed_cleavages(params, sequence)
1516
1555
  num_missed = 0
1517
1556
  split_after, except_before = params.enzyme_specificity
1518
- first, middle, last = self.split_sequence(sequence)
1557
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
1519
1558
  arr = middle.scan(/[#{split_after}][^#{except_before}]/)
1520
1559
  return arr.size
1521
1560
  end
@@ -1524,7 +1563,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
1524
1563
  def self.calc_num_tol_term(params, sequence)
1525
1564
  num_tol = 0
1526
1565
  split_after, except_before = params.enzyme_specificity
1527
- first, middle, last = self.split_sequence(sequence)
1566
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
1528
1567
  last_of_middle = middle[-1,1]
1529
1568
  first_of_middle = middle[0,1]
1530
1569
  if ( split_after.include?(first) && !except_before.include?(first_of_middle) ) || first == '-'
@@ -1552,15 +1591,23 @@ class SpecID::Sequest::PepXML::SearchHit < Array
1552
1591
  end
1553
1592
 
1554
1593
  def to_pepxml
1594
+ mod_pepxml =
1595
+ if self[18]
1596
+ self[18].to_pepxml
1597
+ else
1598
+ ''
1599
+ end
1600
+
1555
1601
  element_xml("search_hit", [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :protein, :num_tot_proteins, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected]) do
1556
- search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
1602
+ mod_pepxml +
1603
+ search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
1557
1604
  end
1558
1605
  end
1559
1606
 
1560
1607
  end
1561
1608
 
1562
1609
  # Positions and masses of modifications
1563
- class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
1610
+ class Sequest::PepXML::SearchHit::ModificationInfo
1564
1611
  include SpecIDXML
1565
1612
 
1566
1613
  ## Should be something like this:
@@ -1583,7 +1630,11 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
1583
1630
  attr_accessor :mod_aminoacid_mass_array
1584
1631
 
1585
1632
  def initialize(hash=nil)
1586
- instance_var_set_from_hash(hash)
1633
+ @mod_nterm_mass = nil
1634
+ @mod_cterm_mass = nil
1635
+ if hash
1636
+ instance_var_set_from_hash(hash)
1637
+ end
1587
1638
  end
1588
1639
 
1589
1640
  # Will escape any xml special chars in modified_peptide
@@ -1621,3 +1672,4 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
1621
1672
 
1622
1673
  end
1623
1674
 
1675
+