ms-sequest 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +2 -2
- data/README.rdoc +20 -21
- data/Rakefile +5 -5
- data/VERSION +1 -1
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/params.rb +8 -8
- data/lib/ms/sequest/pepxml/modifications.rb +10 -10
- data/lib/ms/sequest/pepxml/params.rb +6 -6
- data/lib/ms/sequest/sqt.rb +36 -38
- data/lib/ms/sequest/srf.rb +46 -134
- data/lib/ms/sequest/srf/pepxml.rb +23 -23
- data/lib/ms/sequest/srf/pepxml/sequest.rb +3 -3
- data/lib/ms/sequest/srf/search.rb +5 -5
- data/lib/ms/sequest/srf/sqt.rb +13 -15
- data/spec/ms/sequest/params_spec.rb +1 -1
- data/spec/ms/sequest/pepxml/modifications_spec.rb +4 -4
- data/spec/ms/sequest/sqt_spec.rb +2 -28
- data/spec/ms/sequest/srf/pepxml_spec.rb +2 -2
- data/spec/ms/sequest/srf/search_spec.rb +5 -5
- data/spec/ms/sequest/srf/sqt_spec.rb +7 -8
- data/spec/ms/sequest/srf_spec.rb +7 -7
- data/spec/spec_helper.rb +1 -1
- metadata +28 -33
- data/lib/ms/sequest/bioworks.rb +0 -498
- data/spec/ms/sequest/bioworks_spec.rb +0 -153
data/lib/ms/sequest/srf.rb
CHANGED
@@ -11,16 +11,14 @@ require 'ms/ident/protein'
|
|
11
11
|
require 'ms/sequest/params'
|
12
12
|
|
13
13
|
|
14
|
-
module
|
15
|
-
module
|
16
|
-
|
17
|
-
class Ms::Sequest::Srf < Ms::Ident::Search
|
14
|
+
module MS ; end
|
15
|
+
module MS::Sequest ; end
|
18
16
|
|
17
|
+
class MS::Sequest::Srf < MS::Ident::Search
|
19
18
|
class NoSequestParamsError < ArgumentError
|
20
19
|
end
|
21
20
|
|
22
|
-
|
23
|
-
# inherits peptides and proteins from Search
|
21
|
+
# inherits peptide_hits from Search
|
24
22
|
|
25
23
|
# a String: 3.5, 3.3 or 3.2
|
26
24
|
attr_accessor :version
|
@@ -48,7 +46,7 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
48
46
|
attr_accessor :filtered_by_precursor_mass_tolerance
|
49
47
|
|
50
48
|
def protein_class
|
51
|
-
|
49
|
+
MS::Sequest::Srf::Out::Protein
|
52
50
|
end
|
53
51
|
|
54
52
|
# returns a Sequest::Params object or nil if none
|
@@ -65,7 +63,7 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
65
63
|
if sequest_start_from_last_half = last_half.rindex('[SEQUEST]')
|
66
64
|
params_start_index = sequest_start_from_last_half + halfway
|
67
65
|
handle.seek(params_start_index)
|
68
|
-
params =
|
66
|
+
params = MS::Sequest::Params.new.parse_io(handle)
|
69
67
|
finish_parsing_io_pos = handle.pos
|
70
68
|
else
|
71
69
|
nil # not found
|
@@ -85,24 +83,14 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
85
83
|
|
86
84
|
# opts:
|
87
85
|
# :filter_by_precursor_mass_tolerance => true | false (default true)
|
88
|
-
#
|
89
|
-
#
|
90
|
-
#
|
91
|
-
# :link_protein_hits => true | false (default true)
|
92
|
-
# # if true, generates the @protein attribute for the :protein method
|
93
|
-
# # and creates one protein per reference that is linked to each
|
94
|
-
# # relevant peptide hit.
|
95
|
-
# # if false, each protein for each peptide hit is a unique object
|
96
|
-
# # and the :proteins method returns nil. If you are merging multiple
|
97
|
-
# # searches then you probably want to set this to false to avoid
|
98
|
-
# # recalculation.
|
86
|
+
# this will filter by the sequest params prec tolerance as is
|
87
|
+
# typically done by the Bioworks software.
|
99
88
|
#
|
100
89
|
# :read_pephits => true | false (default true)
|
101
|
-
#
|
102
|
-
#
|
90
|
+
# will attempt to read peptide hit information (equivalent to .out
|
91
|
+
# files), otherwise, just reads the dta information.
|
103
92
|
def initialize(filename=nil, opts={})
|
104
93
|
@peptide_hits = []
|
105
|
-
|
106
94
|
@dta_files = []
|
107
95
|
@out_files = []
|
108
96
|
if filename
|
@@ -111,12 +99,12 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
111
99
|
end
|
112
100
|
|
113
101
|
|
114
|
-
# 1. updates the out_file's list of hits based on passing
|
102
|
+
# 1. updates the out_file's list of hits based on passing peptide_hits (but not
|
115
103
|
# the original hit id; rank is implicit in array ordering)
|
116
104
|
# 2. recalculates deltacn values completely if number of hits changed (does
|
117
105
|
# not touch deltacn orig)
|
118
106
|
#
|
119
|
-
# This can spoil proper protein -> peptide linkages.
|
107
|
+
# This can spoil proper protein -> peptide linkages. MS::Id::Search.merge!
|
120
108
|
# should be run after this method to ensure correct protein -> peptide
|
121
109
|
# linkages.
|
122
110
|
def filter_by_precursor_mass_tolerance!
|
@@ -151,7 +139,7 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
151
139
|
end
|
152
140
|
if hits.size != before
|
153
141
|
out_file.hits = hits # <- is this necessary
|
154
|
-
|
142
|
+
MS::Sequest::Srf::Out::Peptide.update_deltacns_from_xcorr(hits)
|
155
143
|
out_file.num_hits = hits.size
|
156
144
|
end
|
157
145
|
end
|
@@ -165,9 +153,9 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
165
153
|
fh.pos = start
|
166
154
|
|
167
155
|
num_files.times do |i|
|
168
|
-
dta_files[i] =
|
156
|
+
dta_files[i] = MS::Sequest::Srf::Dta.from_io(fh, unpack_35)
|
169
157
|
#p dta_files[i]
|
170
|
-
out_files[i] =
|
158
|
+
out_files[i] = MS::Sequest::Srf::Out.from_io(fh, unpack_35, dup_refs_gt_0)
|
171
159
|
#p out_files[i]
|
172
160
|
end
|
173
161
|
[dta_files, out_files]
|
@@ -177,9 +165,9 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
177
165
|
# opts are the same as for 'new'
|
178
166
|
def from_file(filename, opts)
|
179
167
|
@resident_dir = File.dirname(File.expand_path(filename))
|
180
|
-
opts = { :filter_by_precursor_mass_tolerance => true, :
|
168
|
+
opts = { :filter_by_precursor_mass_tolerance => true, :read_pephits => true}.merge(opts)
|
181
169
|
|
182
|
-
(@params, after_params_io_pos) =
|
170
|
+
(@params, after_params_io_pos) = MS::Sequest::Srf.get_sequest_params_and_finish_pos(filename)
|
183
171
|
return unless @params
|
184
172
|
|
185
173
|
dup_references = 0
|
@@ -204,7 +192,7 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
204
192
|
end
|
205
193
|
|
206
194
|
File.open(filename, 'rb') do |fh|
|
207
|
-
@header =
|
195
|
+
@header = MS::Sequest::Srf::Header.from_io(fh)
|
208
196
|
@version = @header.version
|
209
197
|
|
210
198
|
unpack_35 = case @version
|
@@ -283,13 +271,6 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
283
271
|
end
|
284
272
|
|
285
273
|
filter_by_precursor_mass_tolerance! if params
|
286
|
-
|
287
|
-
if opts[:link_protein_hits]
|
288
|
-
(@peptide_hits, @proteins) = merge!([self.peptide_hits]) do |_protein, _peptides|
|
289
|
-
Ms::Sequest::Srf::Out::Protein.new(_protein.reference, _peptides)
|
290
|
-
end
|
291
|
-
end
|
292
|
-
|
293
274
|
end
|
294
275
|
|
295
276
|
self
|
@@ -323,7 +304,7 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
323
304
|
fh.pos = start
|
324
305
|
|
325
306
|
header.num_dta_files.times do |i|
|
326
|
-
dta_files[i] =
|
307
|
+
dta_files[i] = MS::Sequest::Srf::Dta.from_io(fh, unpack_35)
|
327
308
|
end
|
328
309
|
dta_files
|
329
310
|
end
|
@@ -333,14 +314,14 @@ class Ms::Sequest::Srf < Ms::Ident::Search
|
|
333
314
|
def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0)
|
334
315
|
out_files = Array.new(number_files)
|
335
316
|
header.num_dta_files.times do |i|
|
336
|
-
out_files[i] =
|
317
|
+
out_files[i] = MS::Sequest::Srf::Out.from_io(fh, unpack_35, dup_refs_gt_0)
|
337
318
|
end
|
338
319
|
out_files
|
339
320
|
end
|
340
321
|
|
341
322
|
end
|
342
323
|
|
343
|
-
class
|
324
|
+
class MS::Sequest::Srf::Header
|
344
325
|
|
345
326
|
Start_byte = {
|
346
327
|
:enzyme => 438,
|
@@ -369,7 +350,7 @@ class Ms::Sequest::Srf::Header
|
|
369
350
|
}
|
370
351
|
|
371
352
|
attr_accessor :version
|
372
|
-
# a
|
353
|
+
# a MS::Sequest::Srf::DtaGen object
|
373
354
|
attr_accessor :dta_gen
|
374
355
|
attr_accessor :enzyme
|
375
356
|
attr_accessor :ion_series
|
@@ -401,7 +382,7 @@ class Ms::Sequest::Srf::Header
|
|
401
382
|
def from_io(fh)
|
402
383
|
st = fh.read(4)
|
403
384
|
@version = '3.' + st.unpack('I').first.to_s
|
404
|
-
@dta_gen =
|
385
|
+
@dta_gen = MS::Sequest::Srf::DtaGen.from_io(fh)
|
405
386
|
# if the start_mass end_mass start_scan and end_scan are all zero, its a
|
406
387
|
# combined srf file:
|
407
388
|
@combined = [0.0, 0.0, 0, 0].zip(%w(start_mass end_mass start_scan end_scan)).all? do |one,two|
|
@@ -438,7 +419,7 @@ class Ms::Sequest::Srf::Header
|
|
438
419
|
end
|
439
420
|
|
440
421
|
# the Dta Generation Params
|
441
|
-
class
|
422
|
+
class MS::Sequest::Srf::DtaGen
|
442
423
|
|
443
424
|
## not sure if this is correct
|
444
425
|
# Float
|
@@ -479,9 +460,9 @@ end
|
|
479
460
|
# total_num_possible_charge_states is not correct under 3.5 (Bioworks 3.3.1)
|
480
461
|
# unknown is, well unknown...
|
481
462
|
|
482
|
-
|
463
|
+
MS::Sequest::Srf::Dta = Struct.new( *%w(mh dta_tic num_peaks charge ms_level unknown total_num_possible_charge_states peaks).map(&:to_sym) )
|
483
464
|
|
484
|
-
class
|
465
|
+
class MS::Sequest::Srf::Dta
|
485
466
|
# original
|
486
467
|
# Unpack = "EeIvvvv"
|
487
468
|
Unpack_32 = "EeIvvvv"
|
@@ -496,7 +477,7 @@ class Ms::Sequest::Srf::Dta
|
|
496
477
|
def inspect
|
497
478
|
peaks_st = 'nil'
|
498
479
|
if self[7] ; peaks_st = "[#{self[7].size} bytes]" end
|
499
|
-
"<
|
480
|
+
"<MS::Sequest::Srf::Dta @mh=#{mh} @dta_tic=#{dta_tic} @num_peaks=#{num_peaks} @charge=#{charge} @ms_level=#{ms_level} @total_num_possible_charge_states=#{total_num_possible_charge_states} @peaks=#{peaks_st} >"
|
500
481
|
end
|
501
482
|
|
502
483
|
def self.from_io(fh, unpack_35)
|
@@ -543,12 +524,12 @@ class Ms::Sequest::Srf::Dta
|
|
543
524
|
end
|
544
525
|
|
545
526
|
|
546
|
-
#
|
547
|
-
|
527
|
+
#MS::Sequest::Srf::Out = Struct.new( *%w(first_scan last_scan charge num_hits computer date_time hits total_inten lowest_sp num_matched_peptides db_locus_count).map(&:to_sym) )
|
528
|
+
MS::Sequest::Srf::Out = Struct.new( *%w(num_hits computer date_time total_inten lowest_sp num_matched_peptides db_locus_count hits first_scan last_scan charge).map(&:to_sym) )
|
548
529
|
|
549
530
|
# 0=first_scan, 1=last_scan, 2=charge, 3=num_hits, 4=computer, 5=date_time, 6=hits, 7=total_inten, 8=lowest_sp, 9=num_matched_peptides, 10=db_locus_count
|
550
531
|
|
551
|
-
class
|
532
|
+
class MS::Sequest::Srf::Out
|
552
533
|
Unpack_32 = '@36vx2Z*@60Z*'
|
553
534
|
Unpack_35 = '@36vx4Z*@62Z*'
|
554
535
|
|
@@ -560,10 +541,10 @@ class Ms::Sequest::Srf::Out
|
|
560
541
|
else
|
561
542
|
''
|
562
543
|
end
|
563
|
-
"<
|
544
|
+
"<MS::Sequest::Srf::Out first_scan=#{first_scan}, last_scan=#{last_scan}, charge=#{charge}, num_hits=#{num_hits}, computer=#{computer}, date_time=#{date_time}#{hits_s}>"
|
564
545
|
end
|
565
546
|
|
566
|
-
# returns an
|
547
|
+
# returns an MS::Sequest::Srf::Out object
|
567
548
|
def self.from_io(fh, unpack_35, dup_refs_gt_0)
|
568
549
|
## EMPTY out file is 96 bytes
|
569
550
|
## each hit is 320 bytes
|
@@ -582,16 +563,16 @@ class Ms::Sequest::Srf::Out
|
|
582
563
|
if ar.size > 0
|
583
564
|
num_extra_references = 0
|
584
565
|
_num_hits.times do |i|
|
585
|
-
ar[i] =
|
566
|
+
ar[i] = MS::Sequest::Srf::Out::Peptide.from_io(fh, unpack_35)
|
586
567
|
num_extra_references += ar[i].num_other_loci
|
587
568
|
end
|
588
569
|
if dup_refs_gt_0
|
589
|
-
|
570
|
+
MS::Sequest::Srf::Out::Peptide.read_extra_references(fh, num_extra_references, ar)
|
590
571
|
end
|
591
572
|
## The xcorrs are already ordered by best to worst hit
|
592
573
|
## ADJUST the deltacn's to be meaningful for the top hit:
|
593
574
|
## (the same as bioworks and prophet)
|
594
|
-
|
575
|
+
MS::Sequest::Srf::Out::Peptide.set_deltacn_from_deltacn_orig(ar)
|
595
576
|
end
|
596
577
|
out_obj.hits = ar
|
597
578
|
out_obj[1].chomp! # computer
|
@@ -620,10 +601,10 @@ end
|
|
620
601
|
# num_other_loci is the number of other loci that the peptide matches beyond
|
621
602
|
# the first one listed
|
622
603
|
# srf = the srf object this scan came from
|
623
|
-
|
604
|
+
MS::Sequest::Srf::Out::Peptide = Struct.new( *%w(mh deltacn_orig sf sp xcorr id num_other_loci rsp ions_matched ions_total sequence proteins deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated).map(&:to_sym) )
|
624
605
|
# 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=proteins 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
|
625
606
|
|
626
|
-
class
|
607
|
+
class MS::Sequest::Srf::Out::Peptide
|
627
608
|
|
628
609
|
# creates the deltacn that is meaningful for the top hit (the deltacn_orig
|
629
610
|
# or the second best hit and so on).
|
@@ -656,7 +637,7 @@ class Ms::Sequest::Srf::Out::Peptide
|
|
656
637
|
pep = pep_hits[fh.read(8).unpack('x4I').first - 1]
|
657
638
|
|
658
639
|
ref = fh.read(80).unpack('A*').first
|
659
|
-
pep[11] <<
|
640
|
+
pep[11] << MS::Sequest::Srf::Out::Protein.new(ref[0,38])
|
660
641
|
end
|
661
642
|
# fh.read(6) if unpack_35
|
662
643
|
end
|
@@ -695,9 +676,9 @@ class Ms::Sequest::Srf::Out::Peptide
|
|
695
676
|
end
|
696
677
|
st.push('>')
|
697
678
|
st.join(' ')
|
698
|
-
#"<
|
679
|
+
#"<MS::Sequest::Srf::Out::Peptide @mh=#{mh}, @deltacn=#{deltacn}, @sp=#{sp}, @xcorr=#{xcorr}, @id=#{id}, @rsp=#{rsp}, @ions_matched=#{ions_matched}, @ions_total=#{ions_total}, @sequence=#{sequence}, @proteins(count)=#{proteins.size}, @deltamass=#{deltamass}, @ppm=#{ppm} @aaseq=#{aaseq}, @base_name=#{base_name}, @first_scan=#{first_scan}, @last_scan=#{last_scan}, @charge=#{charge}, @srf(base_name)=#{srf.base_name}>"
|
699
680
|
end
|
700
|
-
# extra_references_array is an array that grows with
|
681
|
+
# extra_references_array is an array that grows with peptide_hits as extra
|
701
682
|
# references are discovered.
|
702
683
|
def self.from_io(fh, unpack_35)
|
703
684
|
## get the first part of the info
|
@@ -712,9 +693,9 @@ class Ms::Sequest::Srf::Out::Peptide
|
|
712
693
|
|
713
694
|
# we are slicing the reference to 38 chars to be the same length as
|
714
695
|
# duplicate references
|
715
|
-
peptide[11] = [
|
696
|
+
peptide[11] = [MS::Sequest::Srf::Out::Protein.new(peptide[11][0,38])]
|
716
697
|
|
717
|
-
peptide[14] =
|
698
|
+
peptide[14] = MS::Ident::Peptide.sequence_to_aaseq(peptide[10])
|
718
699
|
|
719
700
|
fh.read(6) if unpack_35
|
720
701
|
|
@@ -723,81 +704,12 @@ class Ms::Sequest::Srf::Out::Peptide
|
|
723
704
|
|
724
705
|
end
|
725
706
|
|
707
|
+
class MS::Sequest::Srf::Out::Protein < MS::Ident::Protein
|
708
|
+
alias_method :reference, :id
|
726
709
|
|
727
|
-
|
728
|
-
|
729
|
-
class Ms::Sequest::Srf::Out::Protein
|
730
|
-
#include Ms::Ident::Protein
|
731
|
-
|
732
|
-
## we shouldn't have to do this because this is inlcuded in SpecID::Protein, but
|
733
|
-
## under some circumstances it won't work without explicitly calling it.
|
734
|
-
#include ProteinReferenceable
|
735
|
-
|
736
|
-
tmp = $VERBOSE ; $VERBOSE = nil
|
737
|
-
def initialize(reference=nil, peptides=[])
|
738
|
-
self[0] = reference
|
739
|
-
self[1] = peptides
|
740
|
-
end
|
741
|
-
$VERBOSE = tmp
|
742
|
-
|
710
|
+
# the first entry
|
743
711
|
def first_entry
|
744
|
-
reference.split(
|
745
|
-
end
|
746
|
-
|
747
|
-
# "<Ms::Sequest::Srf::Out::Protein reference=\"#{@reference}\">"
|
748
|
-
|
749
|
-
undef_method :inspect
|
750
|
-
def inspect
|
751
|
-
"<Ms::Sequest::Srf::Out::Protein @reference=#{reference}, @peptides(#)=#{peptides.size}>"
|
752
|
-
end
|
753
|
-
end
|
754
|
-
|
755
|
-
class Ms::Sequest::SrfGroup
|
756
|
-
include Ms::Ident::SearchGroup
|
757
|
-
|
758
|
-
# inherits an array of Ms::Sequest::Srf::Out::Peptide objects
|
759
|
-
# inherits an array of Ms::Sequest::Srf::Out::Protein objects
|
760
|
-
|
761
|
-
# see Ms::Id::Search for acceptable arguments
|
762
|
-
# (filename, filenames, array of objects)
|
763
|
-
# opts =
|
764
|
-
# :filter_by_precursor_mass_tolerance => true | false (default true)
|
765
|
-
def initialize(arg, opts={}, &block)
|
766
|
-
orig_opts = opts.dup
|
767
|
-
indiv_opts = { :link_protein_hits => false }
|
768
|
-
super(arg, opts.merge(indiv_opts)) do
|
769
|
-
unless orig_opts[:link_protein_hits] == false
|
770
|
-
(@peptides, @proteins) = merge!(@searches.map {|v| v.peptides }) do |_prot, _peps|
|
771
|
-
Ms::Sequest::Srf::Out::Protein.new(_prot.reference, _peps)
|
772
|
-
end
|
773
|
-
end
|
774
|
-
end
|
775
|
-
block.call(self) if block_given?
|
776
|
-
end
|
777
|
-
|
778
|
-
def search_class
|
779
|
-
Ms::Sequest::Srf
|
780
|
-
end
|
781
|
-
|
782
|
-
# returns the filename used
|
783
|
-
# if the file exists, the name will be expanded to full path, otherwise just
|
784
|
-
# what is given
|
785
|
-
def to_srg(srg_filename='bioworks.srg')
|
786
|
-
File.open(srg_filename, 'w') do |v|
|
787
|
-
@filenames.each do |srf_file|
|
788
|
-
if File.exist? srf_file
|
789
|
-
v.puts File.expand_path(srf_file)
|
790
|
-
else
|
791
|
-
v.puts srf_file
|
792
|
-
end
|
793
|
-
end
|
794
|
-
end
|
795
|
-
srg_filename
|
712
|
+
reference.split(' ',2)[0]
|
796
713
|
end
|
797
714
|
end
|
798
715
|
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
@@ -6,7 +6,7 @@ require 'ms/ident/pepxml/search_hit'
|
|
6
6
|
require 'ms/sequest/srf'
|
7
7
|
require 'ms/sequest/pepxml'
|
8
8
|
|
9
|
-
class
|
9
|
+
class MS::Sequest::Srf
|
10
10
|
module Pepxml
|
11
11
|
|
12
12
|
# A hash with the following *symbol* keys may be set:
|
@@ -36,7 +36,7 @@ class Ms::Sequest::Srf
|
|
36
36
|
# *:retention_times*:: false - <i>include retention times in the file (requires mz_dir to be set)</i>
|
37
37
|
# *:deltacn_orig*:: false - <i>when true, the original SEQUEST deltacn values are used. If false, Bioworks deltacn values are used which are derived by taking the original deltacn of the following hit. This gives the top ranking hit an informative deltacn but makes the deltacn meaningless for other hits.</i>
|
38
38
|
#
|
39
|
-
# *:pepxml_version*::
|
39
|
+
# *:pepxml_version*:: MS::Ident::Pepxml::DEFAULT_PEPXML_VERSION, - <i>Integer to set the pepxml version. The converter and xml output attempts to produce xml specific to the version.</i>
|
40
40
|
# *:verbose*:: true - <i>set to false to quiet warnings</i>
|
41
41
|
DEFAULT_OPTIONS = {
|
42
42
|
:ms_model => nil,
|
@@ -61,7 +61,7 @@ class Ms::Sequest::Srf
|
|
61
61
|
:retention_times => false,
|
62
62
|
:deltacn_orig => false,
|
63
63
|
|
64
|
-
:pepxml_version =>
|
64
|
+
:pepxml_version => MS::Ident::Pepxml::DEFAULT_PEPXML_VERSION,
|
65
65
|
:verbose => true,
|
66
66
|
}
|
67
67
|
|
@@ -80,7 +80,7 @@ class Ms::Sequest::Srf
|
|
80
80
|
[/\w+/, 'UNKNOWN'],
|
81
81
|
]
|
82
82
|
|
83
|
-
# returns an
|
83
|
+
# returns an MS::Ident::Pepxml object. See that object for creating an
|
84
84
|
# xml string or writing to file.
|
85
85
|
def to_pepxml(opts={})
|
86
86
|
opt = DEFAULT_OPTIONS.merge(opts)
|
@@ -126,7 +126,7 @@ class Ms::Sequest::Srf
|
|
126
126
|
puts msg.join("\n") if opt[:verbose]
|
127
127
|
end
|
128
128
|
|
129
|
-
modifications_obj =
|
129
|
+
modifications_obj = MS::Sequest::Pepxml::Modifications.new(params, srf.header.modifications)
|
130
130
|
mass_index = params.mass_index(:precursor)
|
131
131
|
h_plus = mass_index['h+']
|
132
132
|
|
@@ -141,7 +141,7 @@ class Ms::Sequest::Srf
|
|
141
141
|
raise NotImplementedError, "will implement shortly"
|
142
142
|
#mz_file = Dir[File.join(opt[:mz_dir], srf.base_name_noext + opt[:raw_data].first)].first
|
143
143
|
#if mz_file
|
144
|
-
#
|
144
|
+
# MS::Msrun.scans_to_times(mz_file)
|
145
145
|
#else
|
146
146
|
# warn "turning retention_times off since no valid mz[X]ML file was found!!!"
|
147
147
|
# opt[:retention_times] = false
|
@@ -151,28 +151,28 @@ class Ms::Sequest::Srf
|
|
151
151
|
|
152
152
|
summary_xml_filename = srf.base_name_noext + '.xml'
|
153
153
|
|
154
|
-
pepxml =
|
154
|
+
pepxml = MS::Ident::Pepxml.new do |msms_pipeline_analysis|
|
155
155
|
msms_pipeline_analysis.merge!(:summary_xml => summary_xml_filename, :pepxml_version => opt[:pepxml_version]) do |msms_run_summary|
|
156
156
|
# prep the sample enzyme and search_summary
|
157
157
|
msms_run_summary.merge!(
|
158
|
-
|
158
|
+
:base_name => File.join(opt[:mz_dir], srf.base_name_noext),
|
159
159
|
:ms_manufacturer => opt[:ms_manufacturer],
|
160
160
|
:ms_model => opt[:ms_model],
|
161
161
|
:ms_ionization => opt[:ms_ionization],
|
162
162
|
:ms_mass_analyzer => opt[:ms_mass_analyzer],
|
163
163
|
:ms_detector => opt[:ms_detector],
|
164
164
|
:raw_data => opt[:raw_data].first,
|
165
|
-
:raw_data_type => opt[:raw_data].first,
|
165
|
+
:raw_data_type => opt[:raw_data].first,
|
166
166
|
) do |sample_enzyme, search_summary, spectrum_queries|
|
167
167
|
sample_enzyme.merge!(params.sample_enzyme_hash)
|
168
168
|
sample_enzyme.name = opt[:enzyme] if opt[:enzyme]
|
169
169
|
search_summary.merge!(
|
170
|
-
|
170
|
+
:base_name=> srf.resident_dir + '/' + srf.base_name_noext,
|
171
171
|
:search_engine => 'SEQUEST',
|
172
172
|
:precursor_mass_type => params.precursor_mass_type,
|
173
173
|
:fragment_mass_type => params.fragment_mass_type,
|
174
174
|
:out_data_type => opt[:out_data_type],
|
175
|
-
:out_data => opt[:out_data],
|
175
|
+
:out_data => opt[:out_data],
|
176
176
|
) do |search_database, enzymatic_search_constraint, modifications_ar, parameters_hash|
|
177
177
|
search_database.merge!(:local_path => db_filename, :seq_type => opt[:db_seq_type], :database_name => opt[:db_name], :orig_database_url => opt[:db_orig_url], :database_release_date => opt[:db_release_date], :database_release_identifier => opt[:db_release_id])
|
178
178
|
|
@@ -184,9 +184,9 @@ class Ms::Sequest::Srf
|
|
184
184
|
end
|
185
185
|
|
186
186
|
enzymatic_search_constraint.merge!(
|
187
|
-
|
187
|
+
:enzyme => opt[:enzyme] ? opt[:enzyme] : params.enzyme,
|
188
188
|
:max_num_internal_cleavages => params.max_num_internal_cleavages,
|
189
|
-
:min_number_termini => params.min_number_termini,
|
189
|
+
:min_number_termini => params.min_number_termini,
|
190
190
|
)
|
191
191
|
modifications_ar.replace(modifications_obj.modifications)
|
192
192
|
parameters_hash.merge!(params.opts)
|
@@ -196,9 +196,9 @@ class Ms::Sequest::Srf
|
|
196
196
|
precursor_neutral_mass = dta_file.mh - h_plus
|
197
197
|
|
198
198
|
search_hits = out_file.hits[0,opt[:num_hits]].each_with_index.map do |pep,i|
|
199
|
-
(prev_aa, pure_aaseq, next_aa) =
|
199
|
+
(prev_aa, pure_aaseq, next_aa) = MS::Ident::Peptide.prepare_sequence(pep.sequence)
|
200
200
|
calc_neutral_pep_mass = pep.mh - h_plus
|
201
|
-
sh =
|
201
|
+
sh = MS::Ident::Pepxml::SearchHit.new(
|
202
202
|
:hit_rank => i+1,
|
203
203
|
:peptide => pure_aaseq,
|
204
204
|
:peptide_prev_aa => prev_aa,
|
@@ -211,7 +211,7 @@ class Ms::Sequest::Srf
|
|
211
211
|
:massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
|
212
212
|
:num_tol_term => sample_enzyme.num_tol_term(prev_aa, pure_aaseq, next_aa),
|
213
213
|
:num_missed_cleavages => sample_enzyme.num_missed_cleavages(pure_aaseq),
|
214
|
-
:modification_info => modifications_obj.modification_info(
|
214
|
+
:modification_info => modifications_obj.modification_info(MS::Ident::Peptide.split_sequence(pep.sequence)[1])
|
215
215
|
) do |search_scores|
|
216
216
|
if opt[:deltacn_orig]
|
217
217
|
deltacn = pep.deltacn_orig
|
@@ -227,7 +227,7 @@ class Ms::Sequest::Srf
|
|
227
227
|
end
|
228
228
|
end
|
229
229
|
|
230
|
-
sr =
|
230
|
+
sr = MS::Ident::Pepxml::SearchResult.new(:search_hits => search_hits)
|
231
231
|
|
232
232
|
ret_time =
|
233
233
|
if opt[:retention_times]
|
@@ -239,11 +239,11 @@ class Ms::Sequest::Srf
|
|
239
239
|
times.inject(&:+) / times.size.to_f
|
240
240
|
end
|
241
241
|
end
|
242
|
-
|
243
|
-
|
242
|
+
MS::Ident::Pepxml::SpectrumQuery.new(
|
243
|
+
:spectrum => [srf.base_name_noext, *i_ar].join('.'), :start_scan => i_ar[0], :end_scan => i_ar[1],
|
244
244
|
:precursor_neutral_mass => dta_file.mh - h_plus, :assumed_charge => i_ar[2],
|
245
245
|
:retention_time_sec => ret_time,
|
246
|
-
:search_results => [sr],
|
246
|
+
:search_results => [sr],
|
247
247
|
)
|
248
248
|
end
|
249
249
|
spectrum_queries.replace(spec_queries)
|
@@ -259,7 +259,7 @@ end # Srf
|
|
259
259
|
|
260
260
|
require 'trollop'
|
261
261
|
|
262
|
-
module
|
262
|
+
module MS::Sequest::Srf::Pepxml
|
263
263
|
def self.commandline(argv, progname=$0)
|
264
264
|
opts = Trollop::Parser.new do
|
265
265
|
banner %Q{
|
@@ -281,7 +281,7 @@ module Ms::Sequest::Srf::Pepxml
|
|
281
281
|
|
282
282
|
text ""
|
283
283
|
text "minor options:"
|
284
|
-
opt :pepxml_version, 'schema version number to use', :default =>
|
284
|
+
opt :pepxml_version, 'schema version number to use', :default => MS::Ident::Pepxml::DEFAULT_PEPXML_VERSION
|
285
285
|
opt :ms_model, 'mass spectrometer model', :type => :string
|
286
286
|
opt :ms_ionization, 'type of ms ionization', :default => 'ESI'
|
287
287
|
opt :ms_detector, 'ms detector', :default => 'UNKNOWN'
|
@@ -307,7 +307,7 @@ module Ms::Sequest::Srf::Pepxml
|
|
307
307
|
|
308
308
|
argv.zip(opt.delete(:outdirs)) do |srf_file,outdir|
|
309
309
|
outdir ||= File.dirname(srf_file)
|
310
|
-
srf =
|
310
|
+
srf = MS::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
|
311
311
|
pepxml = srf.to_pepxml(opt)
|
312
312
|
outfile = pepxml.to_xml(outdir)
|
313
313
|
puts "wrote file: #{outfile}" if opt[:verbose]
|