ms-sequest 0.0.17 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +26 -10
- data/Gemfile +4 -1
- data/Gemfile.lock +17 -2
- data/VERSION +1 -1
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +1 -1
- data/lib/ms/sequest/bioworks.rb +2 -2
- data/lib/ms/sequest/params.rb +0 -20
- data/lib/ms/sequest/pepxml.rb +7 -245
- data/lib/ms/sequest/pepxml/modifications.rb +247 -0
- data/lib/ms/sequest/pepxml/params.rb +32 -0
- data/lib/ms/sequest/sqt.rb +17 -17
- data/lib/ms/sequest/srf.rb +64 -54
- data/lib/ms/sequest/srf/pepxml.rb +316 -0
- data/lib/ms/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/ms/sequest/srf/sqt.rb +1 -1
- data/spec/ms/sequest/bioworks_spec.rb +11 -11
- data/spec/ms/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/ms/sequest/pepxml_spec.rb +0 -65
- data/spec/ms/sequest/srf/pepxml_spec.rb +84 -0
- data/spec/ms/sequest/srf_spec.rb +3 -3
- data/spec/ms/sequest/srf_spec_helper.rb +2 -2
- data/spec/spec_helper.rb +17 -18
- metadata +73 -19
@@ -0,0 +1,316 @@
|
|
1
|
+
require 'ms/ident/pepxml'
|
2
|
+
require 'ms/ident/pepxml/spectrum_query'
|
3
|
+
require 'ms/ident/pepxml/search_result'
|
4
|
+
require 'ms/ident/pepxml/search_hit'
|
5
|
+
require 'ms/msrun'
|
6
|
+
require 'ms/sequest/srf'
|
7
|
+
require 'ms/sequest/pepxml'
|
8
|
+
|
9
|
+
class Ms::Sequest::Srf
|
10
|
+
module Pepxml
|
11
|
+
|
12
|
+
# A hash with the following *symbol* keys may be set:
|
13
|
+
#
|
14
|
+
# Run Info
|
15
|
+
# *:ms_model*:: nil
|
16
|
+
# *:ms_ionization*:: 'ESI'
|
17
|
+
# *:ms_detector*:: 'UNKNOWN'
|
18
|
+
# *:ms_mass_analyzer*:: nil - <i>typically extracted from the srf file and matched with <b>ModelToMsAnalyzer</b></i>
|
19
|
+
# *:ms_manufacturer*:: 'Thermo'
|
20
|
+
#
|
21
|
+
# Raw data
|
22
|
+
# *:mz_dir*:: nil - <i>path to the mz[X]ML directory, defaults to the directory the srf file is contained in. mz[X]ML data must be available to embed retention times</i>
|
23
|
+
# *:raw_data*:: \['.mzML', '.mzXML'\] - <i>preferred extension for raw data</i>
|
24
|
+
#
|
25
|
+
# Database
|
26
|
+
# *:db_seq_type*:: 'AA' - <i>AA or NA</i>
|
27
|
+
# *:db_dir*:: nil - <i>the directory the fasta file used for the search is housed in. A valid pepxml file must point to a valid fasta file!</i>
|
28
|
+
# *:db_residue_size*:: nil - <i>An integer for the number of residues in the database. if true, calculates the size of the fasta database.</i>
|
29
|
+
# *:db_name:: nil
|
30
|
+
# *:db_orig_database_url*:: nil
|
31
|
+
# *:db_release_date*:: nil
|
32
|
+
# *:db_release_identifier*:: nil
|
33
|
+
#
|
34
|
+
# Search Hits
|
35
|
+
# *:num_hits*:: 1 - <i>the top number of hits to include</i>
|
36
|
+
# *:retention_times*:: false - <i>include retention times in the file (requires mz_dir to be set)</i>
|
37
|
+
# *:deltacn_orig*:: false - <i>when true, the original SEQUEST deltacn values are used. If false, Bioworks deltacn values are used which are derived by taking the original deltacn of the following hit. This gives the top ranking hit an informative deltacn but makes the deltacn meaningless for other hits.</i>
|
38
|
+
#
|
39
|
+
# *:pepxml_version*:: Ms::Ident::Pepxml::DEFAULT_PEPXML_VERSION, - <i>Integer to set the pepxml version. The converter and xml output attempts to produce xml specific to the version.</i>
|
40
|
+
# *:verbose*:: true - <i>set to false to quiet warnings</i>
|
41
|
+
DEFAULT_OPTIONS = {
|
42
|
+
:ms_model => nil,
|
43
|
+
:ms_ionization => 'ESI',
|
44
|
+
:ms_detector => 'UNKNOWN',
|
45
|
+
:ms_mass_analyzer => nil,
|
46
|
+
:ms_manufacturer => 'Thermo',
|
47
|
+
|
48
|
+
:mz_dir => nil,
|
49
|
+
#:raw_data => [".mzXML", '.mzML'],
|
50
|
+
:raw_data => ['.mzML', '.mzXML'],
|
51
|
+
|
52
|
+
:db_seq_type => 'AA',
|
53
|
+
:db_dir => nil,
|
54
|
+
:db_residue_size => nil,
|
55
|
+
:db_name => nil,
|
56
|
+
:db_orig_database_url => nil,
|
57
|
+
:db_release_date => nil,
|
58
|
+
:db_release_identifier => nil,
|
59
|
+
|
60
|
+
:num_hits => 1,
|
61
|
+
:retention_times => false,
|
62
|
+
:deltacn_orig => false,
|
63
|
+
|
64
|
+
:pepxml_version => Ms::Ident::Pepxml::DEFAULT_PEPXML_VERSION,
|
65
|
+
:verbose => true,
|
66
|
+
}
|
67
|
+
|
68
|
+
# An array of regexp to string pairs. The regexps are matched against the
|
69
|
+
# model (srf.header.model) and the corresponding string will be used as
|
70
|
+
# the mass analyzer.
|
71
|
+
#
|
72
|
+
# /Orbitrap/:: 'Orbitrap'
|
73
|
+
# /LCQ Deca XP/:: 'Ion Trap'
|
74
|
+
# /LTQ/:: 'Ion Trap'
|
75
|
+
# /\w+/:: 'UNKNOWN'
|
76
|
+
ModelToMsAnalyzer = [
|
77
|
+
[/Orbitrap/, 'Orbitrap'],
|
78
|
+
[/LCQ Deca XP/, 'Ion Trap'],
|
79
|
+
[/LTQ/, 'Ion Trap'],
|
80
|
+
[/\w+/, 'UNKNOWN'],
|
81
|
+
]
|
82
|
+
|
83
|
+
# returns an Ms::Ident::Pepxml object. See that object for creating an
|
84
|
+
# xml string or writing to file.
|
85
|
+
def to_pepxml(opts={})
|
86
|
+
opt = DEFAULT_OPTIONS.merge(opts)
|
87
|
+
srf = self
|
88
|
+
|
89
|
+
# with newer pepxml version these are not required anymore
|
90
|
+
hidden_opts = {
|
91
|
+
# format of file storing the runner up peptides (if not present in
|
92
|
+
# pepXML) this was made optional after version 19
|
93
|
+
:out_data_type => "out", ## may be srf??
|
94
|
+
# runner up search hit data type extension (e.g. .tgz)
|
95
|
+
:out_data => ".srf",
|
96
|
+
}
|
97
|
+
opt.merge!(hidden_opts)
|
98
|
+
|
99
|
+
params = srf.params
|
100
|
+
header = srf.header
|
101
|
+
|
102
|
+
opt[:ms_model] ||= srf.header.model
|
103
|
+
|
104
|
+
unless opt[:ms_mass_analyzer]
|
105
|
+
ModelToMsAnalyzer.each do |regexp, val|
|
106
|
+
if opt[:ms_model].match(regexp)
|
107
|
+
opt[:ms_mass_analyzer] = val
|
108
|
+
break
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# get the database name
|
114
|
+
db_filename = header.db_filename.sub(/\.hdr$/, '')
|
115
|
+
if opt[:db_dir]
|
116
|
+
db_filename = File.join(opt[:db_dir], db_filename.split(/[\/\\]+/).last)
|
117
|
+
end
|
118
|
+
if File.exist?(db_filename)
|
119
|
+
db_filename = File.expand_path(db_filename)
|
120
|
+
else
|
121
|
+
msg = ["!!! WARNING !!!"]
|
122
|
+
msg << "!!! Can't find database: #{db_filename}"
|
123
|
+
msg << "!!! pepxml *requires* that the db path be valid"
|
124
|
+
msg << "!!! make sure 1) the fasta file is available on this system"
|
125
|
+
msg << "!!! 2) you've specified a valid directory with --db-dir (or :db_dir)"
|
126
|
+
puts msg.join("\n") if opt[:verbose]
|
127
|
+
end
|
128
|
+
|
129
|
+
modifications_obj = Ms::Sequest::Pepxml::Modifications.new(params, srf.header.modifications)
|
130
|
+
mass_index = params.mass_index(:precursor)
|
131
|
+
h_plus = mass_index['h+']
|
132
|
+
|
133
|
+
opt[:mz_dir] ||= srf.resident_dir
|
134
|
+
found_ext = opt[:raw_data].find do |raw_data|
|
135
|
+
Dir[File.join(opt[:mz_dir], srf.base_name_noext + raw_data)].first
|
136
|
+
end
|
137
|
+
opt[:raw_data] = [found_ext] if found_ext
|
138
|
+
|
139
|
+
scan_to_ret_time =
|
140
|
+
if opt[:retention_times]
|
141
|
+
mz_file = Dir[File.join(opt[:mz_dir], srf.base_name_noext + opt[:raw_data].first)].first
|
142
|
+
if mz_file
|
143
|
+
Ms::Msrun.scans_to_times(mz_file)
|
144
|
+
else
|
145
|
+
warn "turning retention_times off since no valid mz[X]ML file was found!!!"
|
146
|
+
opt[:retention_times] = false
|
147
|
+
nil
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
summary_xml_filename = srf.base_name_noext + '.xml'
|
152
|
+
|
153
|
+
pepxml = Ms::Ident::Pepxml.new do |msms_pipeline_analysis|
|
154
|
+
msms_pipeline_analysis.merge!(:summary_xml => summary_xml_filename, :pepxml_version => opt[:pepxml_version]) do |msms_run_summary|
|
155
|
+
# prep the sample enzyme and search_summary
|
156
|
+
msms_run_summary.merge!(
|
157
|
+
:base_name => File.join(opt[:mz_dir], srf.base_name_noext),
|
158
|
+
:ms_manufacturer => opt[:ms_manufacturer],
|
159
|
+
:ms_model => opt[:ms_model],
|
160
|
+
:ms_ionization => opt[:ms_ionization],
|
161
|
+
:ms_mass_analyzer => opt[:ms_mass_analyzer],
|
162
|
+
:ms_detector => opt[:ms_detector],
|
163
|
+
:raw_data => opt[:raw_data].first,
|
164
|
+
:raw_data_type => opt[:raw_data].first,
|
165
|
+
) do |sample_enzyme, search_summary, spectrum_queries|
|
166
|
+
sample_enzyme.merge!(params.sample_enzyme_hash)
|
167
|
+
search_summary.merge!(
|
168
|
+
:base_name=> srf.resident_dir + '/' + srf.base_name_noext,
|
169
|
+
:search_engine => 'SEQUEST',
|
170
|
+
:precursor_mass_type => params.precursor_mass_type,
|
171
|
+
:fragment_mass_type => params.fragment_mass_type,
|
172
|
+
:out_data_type => opt[:out_data_type],
|
173
|
+
:out_data => opt[:out_data],
|
174
|
+
) do |search_database, enzymatic_search_constraint, modifications_ar, parameters_hash|
|
175
|
+
search_database.merge!(:local_path => db_filename, :seq_type => opt[:db_seq_type], :database_name => opt[:db_name], :orig_database_url => opt[:db_orig_database_url], :database_release_date => opt[:db_release_date], :database_release_identifier => opt[:db_release_identifier])
|
176
|
+
|
177
|
+
case opt[:db_residue_size]
|
178
|
+
when Integer
|
179
|
+
search_database.size_of_residues = opt[:db_residue_size]
|
180
|
+
when true
|
181
|
+
search_database.set_size_of_residues!
|
182
|
+
end
|
183
|
+
|
184
|
+
enzymatic_search_constraint.merge!(
|
185
|
+
:enzyme => params.enzyme,
|
186
|
+
:max_num_internal_cleavages => params.max_num_internal_cleavages,
|
187
|
+
:min_number_termini => params.min_number_termini,
|
188
|
+
)
|
189
|
+
modifications_ar.replace(modifications_obj.modifications)
|
190
|
+
parameters_hash.merge!(params.opts)
|
191
|
+
end
|
192
|
+
|
193
|
+
spec_queries = srf.dta_files.zip(srf.out_files, index).map do |dta_file,out_file,i_ar|
|
194
|
+
precursor_neutral_mass = dta_file.mh - h_plus
|
195
|
+
|
196
|
+
search_hits = out_file.hits[0,opt[:num_hits]].each_with_index.map do |pep,i|
|
197
|
+
(prev_aa, pure_aaseq, next_aa) = Ms::Ident::Peptide.prepare_sequence(pep.sequence)
|
198
|
+
calc_neutral_pep_mass = pep.mh - h_plus
|
199
|
+
sh = Ms::Ident::Pepxml::SearchHit.new(
|
200
|
+
:hit_rank => i+1,
|
201
|
+
:peptide => pure_aaseq,
|
202
|
+
:peptide_prev_aa => prev_aa,
|
203
|
+
:peptide_next_aa => next_aa,
|
204
|
+
:protein => pep.proteins.first.reference.split(' ')[0],
|
205
|
+
:num_tot_proteins => pep.proteins.size,
|
206
|
+
:num_matched_ions => pep.ions_matched,
|
207
|
+
:tot_num_ions => pep.ions_total,
|
208
|
+
:calc_neutral_pep_mass => calc_neutral_pep_mass,
|
209
|
+
:massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
|
210
|
+
:num_tol_term => sample_enzyme.num_tol_term(prev_aa, pure_aaseq, next_aa),
|
211
|
+
:num_missed_cleavages => sample_enzyme.num_missed_cleavages(pure_aaseq),
|
212
|
+
:modification_info => modifications_obj.modification_info(Ms::Ident::Peptide.split_sequence(pep.sequence)[1])
|
213
|
+
) do |search_scores|
|
214
|
+
if opt[:deltacn_orig]
|
215
|
+
deltacn = pep.deltacn_orig
|
216
|
+
deltacnstar = nil
|
217
|
+
else
|
218
|
+
deltacn = pep.deltacn
|
219
|
+
deltacn = 1.0 if deltacn == 1.1
|
220
|
+
deltcnstar = out_file.hits[i+1].nil? ? '1' : '0'
|
221
|
+
end
|
222
|
+
search_scores.merge!( :xcorr => pep.xcorr, :deltcn => deltacn,
|
223
|
+
:spscore => pep.sp, :sprank => pep.rsp)
|
224
|
+
search_scores[:deltacnstar] = deltacnstar if deltacnstar
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
sr = Ms::Ident::Pepxml::SearchResult.new(:search_hits => search_hits)
|
229
|
+
|
230
|
+
ret_time =
|
231
|
+
if opt[:retention_times]
|
232
|
+
(first_scan, last_scan) = i_ar[0,2]
|
233
|
+
if first_scan==last_scan
|
234
|
+
scan_to_ret_time[i_ar[0]]
|
235
|
+
else
|
236
|
+
times = ((i_ar[0])..(i_ar[1])).step(1).map {|i| scan_to_ret_time[i] }.compact
|
237
|
+
times.inject(&:+) / times.size.to_f
|
238
|
+
end
|
239
|
+
end
|
240
|
+
Ms::Ident::Pepxml::SpectrumQuery.new(
|
241
|
+
:spectrum => [srf.base_name_noext, *i_ar].join('.'), :start_scan => i_ar[0], :end_scan => i_ar[1],
|
242
|
+
:precursor_neutral_mass => dta_file.mh - h_plus, :assumed_charge => i_ar[2],
|
243
|
+
:retention_time_sec => ret_time,
|
244
|
+
:search_results => [sr],
|
245
|
+
)
|
246
|
+
end
|
247
|
+
spectrum_queries.replace(spec_queries)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
pepxml
|
252
|
+
end # to_pepxml
|
253
|
+
end # Srf::Pepxml
|
254
|
+
include Pepxml
|
255
|
+
end # Srf
|
256
|
+
|
257
|
+
|
258
|
+
require 'trollop'
|
259
|
+
|
260
|
+
module Ms::Sequest::Srf::Pepxml
|
261
|
+
def self.commandline(argv, progname=$0)
|
262
|
+
opts = Trollop::Parser.new do
|
263
|
+
banner %Q{
|
264
|
+
usage: #{progname} [OPTIONS] <file>.srf ...
|
265
|
+
output: <file>.xml ...
|
266
|
+
}.lines.map(&:lstrip).join
|
267
|
+
|
268
|
+
text ""
|
269
|
+
text "major options:"
|
270
|
+
opt :db_dir, "The dir holding the DB if different than in Srf. (pepxml requires a valid database path)", :type => :string
|
271
|
+
opt :mz_dir, "directory holding mz[X]ML files (defaults to the folder holding the srf file)", :type => :string
|
272
|
+
opt :retention_times, "include retention times (requires mz-dir)"
|
273
|
+
opt :deltacn_orig, "use original deltacn values created by SEQUEST. By default, the top hit gets the next hit's original deltacn."
|
274
|
+
opt :no_filter, "do not filter hits by peptide_mass_tolerance (per sequest params)"
|
275
|
+
opt :num_hits, "include N top hits", :default => 1
|
276
|
+
opt :outdirs, "list of output directories", :type => :strings
|
277
|
+
opt :quiet, "do not print warnings, etc."
|
278
|
+
|
279
|
+
text ""
|
280
|
+
text "minor options:"
|
281
|
+
opt :ms_model, 'mass spectrometer model', :type => :string
|
282
|
+
opt :ms_ionization, 'type of ms ionization', :default => 'ESI'
|
283
|
+
opt :ms_detector, 'ms detector', :default => 'UNKNOWN'
|
284
|
+
opt :ms_mass_analyzer, 'ms mass analyzer', :type => :string
|
285
|
+
opt :ms_manufacturer, 'ms manufacturer', :default => 'Thermo'
|
286
|
+
opt :raw_data, 'preferred extension for raw data', :default => '.mzXML'
|
287
|
+
opt :db_seq_type, "'AA' or 'NA'", :default => 'AA'
|
288
|
+
opt :db_residue_size, 'calculate the size of the fasta file'
|
289
|
+
opt :db_name, 'the database name', :type => :string
|
290
|
+
opt :db_orig_database_url, 'original database url', :type => :string
|
291
|
+
opt :db_release_date, 'database release date', :type => :string
|
292
|
+
opt :db_release_identifier, 'the database release identifier', :type => :string
|
293
|
+
end
|
294
|
+
|
295
|
+
opt = opts.parse argv
|
296
|
+
opts.educate && exit if argv.empty?
|
297
|
+
|
298
|
+
Trollop.die :outdirs, "outdirs must be same size as number of input files" if opt.outdirs && opt.outdirs.size != argv.size
|
299
|
+
opt[:filter] = !opt.delete(:no_filter)
|
300
|
+
opt[:outdirs] ||= []
|
301
|
+
opt[:raw_data] = [opt[:raw_data]] if opt[:raw_data]
|
302
|
+
opt[:verbose] = !opt[:quiet]
|
303
|
+
|
304
|
+
argv.zip(opt.delete(:outdirs)) do |srf_file,outdir|
|
305
|
+
outdir ||= File.dirname(srf_file)
|
306
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
|
307
|
+
pepxml = srf.to_pepxml(opt)
|
308
|
+
outfile = pepxml.to_xml(outdir)
|
309
|
+
puts "wrote file: #{outfile}" if opt[:verbose]
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
|
316
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Ms ; end
|
2
|
+
module Ms::Ident ; end
|
3
|
+
|
4
|
+
class Ms::Ident::Pepxml
|
5
|
+
class SearchHit
|
6
|
+
Sequest = Struct.new(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank) do
|
7
|
+
|
8
|
+
# Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
|
9
|
+
def self.split_ions(ions)
|
10
|
+
ions.split("/").map {|ion| ion.to_i }
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_xml(builder)
|
14
|
+
members.zip(self.to_a) do |sym, val|
|
15
|
+
builder.search_score(:name => sym, :value => val)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
data/lib/ms/sequest/srf/sqt.rb
CHANGED
@@ -159,7 +159,7 @@ module Ms
|
|
159
159
|
end
|
160
160
|
# note that the rank is determined by the order..
|
161
161
|
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
162
|
-
hit.
|
162
|
+
hit.proteins.each do |prot|
|
163
163
|
out.puts ['L', prot.first_entry].join("\t")
|
164
164
|
end
|
165
165
|
end
|
@@ -13,10 +13,10 @@ describe Bioworks, 'set from an xml file' do
|
|
13
13
|
it 'can set one with labeled proteins' do
|
14
14
|
file = Tfiles + "/bioworks_with_INV_small.xml"
|
15
15
|
obj = Bioworks.new(file)
|
16
|
-
obj.
|
16
|
+
obj.proteins.size.should == 19
|
17
17
|
file = Tfiles + '/bioworks_small.xml'
|
18
18
|
obj = Bioworks.new(file)
|
19
|
-
obj.
|
19
|
+
obj.proteins.size.should == 106
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'can parse an xml file NOT derived from multi-concensus' do
|
@@ -28,10 +28,10 @@ describe Bioworks, 'set from an xml file' do
|
|
28
28
|
obj.global_filename.should == gfn
|
29
29
|
obj.origfilename.should == origfilename
|
30
30
|
obj.origfilepath.should == origfilepath
|
31
|
-
obj.
|
32
|
-
obj.
|
33
|
-
obj.
|
34
|
-
obj.
|
31
|
+
obj.proteins.size.should == 7
|
32
|
+
obj.proteins.first.peptides.first.base_name.should == gfn
|
33
|
+
obj.proteins.first.peptides.first.file.should == "152"
|
34
|
+
obj.proteins.first.peptides.first.charge.should == 2
|
35
35
|
# @TODO: add more tests here
|
36
36
|
end
|
37
37
|
|
@@ -57,7 +57,7 @@ describe Bioworks, 'set from an xml file' do
|
|
57
57
|
def _assert_equal_pieces(exp, act, prot)
|
58
58
|
# equal as floats (by delta)
|
59
59
|
exp.each_index do |i|
|
60
|
-
if i == 5 # both
|
60
|
+
if i == 5 # both proteins and peptides
|
61
61
|
act[i].to_f.should be_close(exp[i].to_f, 0.1)
|
62
62
|
elsif i == 3 && !prot
|
63
63
|
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
@@ -99,7 +99,7 @@ describe Bioworks, 'set from an xml file' do
|
|
99
99
|
end
|
100
100
|
exp_peps = exp_peps.zip(exp_prots)
|
101
101
|
exp_peps.collect! do |both|
|
102
|
-
both[0].
|
102
|
+
both[0].proteins = [both[1]]
|
103
103
|
both[0]
|
104
104
|
end
|
105
105
|
|
@@ -107,8 +107,8 @@ describe Bioworks, 'set from an xml file' do
|
|
107
107
|
pep = Bioworks::Pep.new
|
108
108
|
pep.charge = arr[0]
|
109
109
|
pep.sequence = arr[1]
|
110
|
-
pep.
|
111
|
-
pep.
|
110
|
+
pep.proteins = [Bioworks::Prot.new]
|
111
|
+
pep.proteins.first.reference = "#{cnt}"
|
112
112
|
cnt += 1
|
113
113
|
pep
|
114
114
|
end
|
@@ -130,7 +130,7 @@ end
|
|
130
130
|
|
131
131
|
describe Bioworks::Pep do
|
132
132
|
it 'can be initialized from a hash' do
|
133
|
-
hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :
|
133
|
+
hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :proteins => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_proteins => 17, :_first_prot => 18}
|
134
134
|
pep = Bioworks::Pep.new(hash)
|
135
135
|
hash.each do |k,v|
|
136
136
|
pep.send(k).should == v
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/sequest/params'
|
4
|
+
require 'ms/sequest/pepxml/modifications'
|
5
|
+
|
6
|
+
describe 'Ms::Sequest::Pepxml::Modifications' do
|
7
|
+
before do
|
8
|
+
tf_params = TESTFILES + "/bioworks32.params"
|
9
|
+
@params = Ms::Sequest::Params.new(tf_params)
|
10
|
+
# The params object here is completely unnecessary for this test, except
|
11
|
+
# that it sets up the mass table
|
12
|
+
@obj = Ms::Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
13
|
+
end
|
14
|
+
it 'creates a mod_symbols_hash' do
|
15
|
+
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
16
|
+
@obj.mod_symbols_hash.should == answ
|
17
|
+
## need more here
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'creates a ModificationInfo object given a special peptide sequence' do
|
21
|
+
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
22
|
+
@params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
23
|
+
@params.term_diff_search_options = "14.20000 12.33000"
|
24
|
+
mod = Ms::Sequest::Pepxml::Modifications.new(@params, mod_string)
|
25
|
+
## no mods
|
26
|
+
peptide_nomod = "PEPTIDE"
|
27
|
+
ok mod.modification_info(peptide_nomod).nil?
|
28
|
+
peptide_mod = "]M*EC^S@IDM#M*EMSCM["
|
29
|
+
modinfo = mod.modification_info(peptide_mod)
|
30
|
+
|
31
|
+
xml_string = modinfo.to_xml
|
32
|
+
xml_string.matches /<mod_aminoacid_mass /
|
33
|
+
xml_string.matches /mod_nterm_mass=/
|
34
|
+
xml_string.matches /mod_cterm_mass=/
|
35
|
+
xml_string.matches /modified_peptide=/
|
36
|
+
|
37
|
+
modinfo.mod_aminoacid_masses.size.is 5
|
38
|
+
mod_aa_masses = modinfo.mod_aminoacid_masses
|
39
|
+
# positions are verified, masses are just frozen
|
40
|
+
[1,3,4,7,8].zip([147.09606, 115.1429, 167.0772999, 160.19606, 147.09606], mod_aa_masses) do |pos, mass, obj|
|
41
|
+
obj.position.is pos
|
42
|
+
obj.mass.should.be.close mass, 0.0001
|
43
|
+
end
|
44
|
+
# These values are just frozen and not independently verified yet
|
45
|
+
modinfo.mod_nterm_mass.should.be.close 146.4033, 0.0001
|
46
|
+
modinfo.mod_cterm_mass.should.be.close 160.5334, 0.0001
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|