ms-ident 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,484 @@
1
+ require 'rexml/document'
2
+ require 'hash_by'
3
+ require 'instance_var_set_from_hash'
4
+ require 'axml'
5
+ require 'spec_id'
6
+ require 'arrayclass'
7
+
8
+ require 'spec_id/parser/proph'
9
+
10
+
11
+ module SpecID ; end
12
+ module SpecID::Prot ; end
13
+ module SpecID::Pep ; end
14
+
15
+ module Proph
16
+
17
+ class ProtSummary
18
+ include SpecID
19
+
20
+ # if you get this match it's a protein prophet file and the version is the
21
+ # first match!
22
+ Filetype_and_version_re_old = /ProteinProphet_v([\.\d]+)\.dtd/ # gives 1.9 or what else?
23
+ Filetype_and_version_re_new = /protXML_v([\.\d]+)\.xsd/ # gives 4 right now
24
+ # inherits prots and peps
25
+
26
+ # the protein groups
27
+ attr_accessor :prot_groups
28
+ attr_accessor :version
29
+
30
+ def hi_prob_best ; true end
31
+
32
+ def get_version(file)
33
+ answer = nil
34
+ File.open(file) do |fh|
35
+ 5.times do
36
+ line = fh.gets
37
+ answer =
38
+ if line =~ Filetype_and_version_re_new
39
+ $1.dup
40
+ elsif line =~ Filetype_and_version_re_old
41
+ $1.dup
42
+ end
43
+ break if answer
44
+ end
45
+ end
46
+ raise(ArgumentError, "couldn't detect version in #{file}") unless answer
47
+ answer
48
+ end
49
+
50
+ def initialize(file=nil)
51
+ @prots = nil
52
+ if file
53
+ @version = get_version(file)
54
+ #@prot_groups = ProtSummary::Parser.new.parse_file(file)
55
+ SpecID::Parser::ProtProph.new(:spec_id).parse(file, :spec_id => self)
56
+ end
57
+ end
58
+
59
+ # returns a set of unique proteins
60
+ def unique_prots(prot_groups)
61
+ all_prots = []
62
+ prot_groups.each do |pg|
63
+ pg.prots.each do |prt|
64
+ all_prots << prt
65
+ end
66
+ end
67
+ all_prots.hash_by(:protein_name).map{|name,prot_arr| prot_arr.first }
68
+ end
69
+
70
+ end
71
+
72
+ class ProtSummary::Parser
73
+ attr_accessor :prot_groups
74
+ def initialize(file=nil, with_peps=false, tp='axml')
75
+ if file
76
+ @prot_groups = parse_file(file, with_peps, tp)
77
+ end
78
+ end
79
+
80
+ # returns an array of protein_groups
81
+ def parse_file(file, with_peps=false, tp='axml')
82
+ File.open(file) do |fh|
83
+ @prot_groups = _parse_for_prot_groups(fh, with_peps, tp)
84
+ end
85
+ @prot_groups
86
+ end
87
+
88
+ # returns an array of ProtGroup objects
89
+ def _parse_for_prot_groups(stream, with_peps=false, tp='axml')
90
+ prtgrps = []
91
+ case tp
92
+ when 'axml'
93
+ root = AXML.parse(stream)
94
+ root.protein_group.each do |protein_group|
95
+ pg = ProtGroup.new(protein_group.attrs) do
96
+ protein_group.map do |protein|
97
+ Prot.new(protein.attrs)
98
+ end
99
+ end
100
+ prtgrps << pg
101
+ end
102
+ end
103
+ prtgrps
104
+ end
105
+ end # ProtSummary::Parser
106
+
107
+
108
+ class ProtGroup
109
+ attr_accessor :group_number, :probability, :prots
110
+ def initialize(args=nil)
111
+ @prots = []
112
+ if args
113
+ instance_var_set_from_hash(args)
114
+ end
115
+ if block_given?
116
+ @prots = yield
117
+ end
118
+ end
119
+ end
120
+
121
+ end # Proph
122
+
123
+
124
+
125
+ Proph::Prot = Arrayclass.new(%w(protein_name probability n_indistinguishable_proteins percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids description peps))
126
+
127
+ # note that 'description' is found in the element 'annotation', attribute 'protein_description'
128
+ # NOTE!: unique_stripped peptides is an array rather than + joined string
129
+ class Proph::Prot
130
+ include SpecID::Prot
131
+
132
+ # returns protein_name
133
+ def name ; self[0] end
134
+ def reference ; self[0] end
135
+ def first_entry ; self[0] end # the name is also the first_entry
136
+
137
+ end
138
+
139
+ #def to_s
140
+ # '<Prot: protein_name=' + @protein_name + ' ' + 'probability=' + @probability.to_s + '>'
141
+ #end
142
+
143
+ # this is a pep from a -prot.xml file
144
+
145
+ Proph::Prot::Pep = Arrayclass.new(%w(peptide_sequence charge initial_probability nsp_adjusted_probability weight is_nondegenerate_evidence n_enzymatic_termini n_sibling_peptides n_sibling_peptides_bin n_instances is_contributing_evidence calc_neutral_pep_mass modification_info prots))
146
+
147
+ class Proph::Prot::Pep
148
+ include SpecID::Pep
149
+
150
+ alias_method :mod_info, :modification_info
151
+ alias_method :mod_info=, :modification_info=
152
+
153
+ def aaseq ; self[0] end
154
+ def probability ; self[3] end
155
+
156
+ end # class Pep
157
+
158
+ =begin
159
+ #attr_accessor :sequence, :probability, :filenames, :charge, :precursor_neutral_mass, :nsp_cutoff, :scans
160
+ #attr_writer :arithmetic_avg_scan_by_parent_time
161
+
162
+ #def initialize(args=nil)
163
+ # if args
164
+ # @sequence = args[:sequence]
165
+ # @probability = args[:probability] ## nsp prob
166
+ # @filenames = args[:filenames]
167
+ # @charge = args[:charge]
168
+ # @nsp_cutoff = args[:nsp_cutoff]
169
+ # if args.key?(:scans)
170
+ # @scans = args[:scans]
171
+ # else
172
+ # @scans = [] ## this is set later if needed
173
+ # end
174
+ # else
175
+ # @scans = []
176
+ # end
177
+ #end
178
+
179
+ # filter peptides based on the number of scans
180
+ # if a peptide has more than max_dups scans, the peptide is tossed
181
+ # note that multiple scans that were used as a single dtafile scan
182
+ # will be counted as a single scan for these purposes!
183
+ # (easy, since they are stored as a single item in the array of scans)
184
+ def self.filter_by_max_dup_scans(max_dups=nil, peps=nil)
185
+ if max_dups
186
+ new_peps = []
187
+ peps.each do |pep|
188
+ unless pep.scans.size > max_dups
189
+ new_peps << pep
190
+ end
191
+ end
192
+ new_peps
193
+ else
194
+ peps.dup
195
+ end
196
+ end
197
+
198
+
199
+ ## from the list of scans, creates a scan object whose time is the
200
+ ## arithmetic mean of the parent scans (based on prec_inten) and whose
201
+ ## prec_mz is the avg of all prec_mz's. num is nil, charge is the first
202
+ def arithmetic_avg_scan_by_parent_time
203
+ unless @arithmetic_avg_scan_by_parent_time
204
+ flat_scans = @scans.flatten
205
+
206
+ # new_prec_mz
207
+ prec_mz_sum = 0.0
208
+ prec_inten_sum = 0.0
209
+ times = []
210
+ intens = []
211
+ tot_inten = 0.0
212
+ flat_scans.each do |c|
213
+ prec_inten = c.prec_inten
214
+ prec_inten_sum += prec_inten
215
+ prec_mz_sum += c.prec_mz
216
+ tot_inten += prec_inten
217
+ times << c.parent.time
218
+ intens << prec_inten
219
+ end
220
+ new_prec_mz = prec_mz_sum / flat_scans.size
221
+ new_prec_inten = prec_inten_sum / flat_scans.size
222
+
223
+ fraction_inten = []
224
+ intens.each do |inten|
225
+ fraction_inten.push( inten/tot_inten )
226
+ end
227
+
228
+ new_time = 0.0
229
+ (0...times.size).each do |i|
230
+ new_time += times[i] * fraction_inten[i]
231
+ end
232
+
233
+ @arithmetic_avg_scan_by_parent_time = MS::Scan.new( nil, @scans.first.ms_level, new_time, new_prec_mz, new_prec_inten )
234
+
235
+ end
236
+ @arithmetic_avg_scan_by_parent_time
237
+ end
238
+
239
+ def to_s
240
+ '<Pep seq=' + @sequence + ' ' + 'prob=' + @probability.to_s + ' charge=' + @charge + '>'
241
+ end
242
+
243
+ def has_dta?(dta_filename)
244
+ if @filenames
245
+ @filenames.each do |fn|
246
+ if dta_filename == fn
247
+ return true
248
+ end
249
+ end
250
+ end
251
+ return false
252
+ end
253
+
254
+
255
+ # Given a list of peptides, returns only those unique based on
256
+ # sequence/charge
257
+ def self.uniq_by_seqcharge(peptides)
258
+ # @TODO: this could be done with one fewer traversals, but it is beautiful
259
+ peptides.hash_by(:sequence, :charge).collect do |k,v|
260
+ v.first
261
+ end
262
+ end
263
+ =end
264
+
265
+
266
+
267
+
268
+
269
+ =begin
270
+
271
+ # Class for parsing the peptide prophet output files in various ways
272
+ class Proph::Pep::Parser < Parser
273
+
274
+ # parse_type = "rexml" | "regex"
275
+ # regex's are about 50 times faster but are not guaranteed to work
276
+ # seq charge hash is keyed on an array -> [sequence,charge]
277
+ # @TODO: implement parsing on this with xmlparser
278
+ def dta_filenames_by_seq_charge(pep_xml_file, parse_type="rexml")
279
+ seq_charge_hash = Hash.new {|hash,key| hash[key] = [] }
280
+ case parse_type
281
+ when "rexml"
282
+ #puts "READING: " + pep_xml_file + " ..."
283
+ doc = REXML::Document.new File.new(pep_xml_file)
284
+
285
+ ## Create a hash of peptides based on sequence_charge (takes an array)
286
+ doc.elements.each("msms_pipeline_analysis/msms_run_summary/search_result") do |result|
287
+ pep_charge = result.attributes['assumed_charge']
288
+ filename = result.attributes['spectrum']
289
+ result.elements.to_a('search_hit').each do |hit|
290
+ pep_seq = hit.attributes['peptide']
291
+ seq_charge = [pep_seq, pep_charge]
292
+ seq_charge_hash[seq_charge] << filename
293
+ end
294
+ end
295
+ seq_charge_hash
296
+ when "regex"
297
+ #puts "READING: " + pep_xml_file + " ..."
298
+ ## Create a hash of peptides based on sequence_charge (takes an array)
299
+
300
+ ## file from peptideAtlas:
301
+ search_result_regex1 = /<spectrum_query spectrum="(.*\.\d+\.\d+\.\d)".* assumed_charge="(\d)"/o
302
+ search_result_regex2 = /<search_result sxpectrum="(.*\.\d+\.\d+\.\d)".* assumed_charge="(\d)"/o
303
+ search_hit_regex = /<search_hit .*peptide="(\w+)" /o
304
+
305
+ peptide_h = {}
306
+ filename = nil
307
+ pep_charge = nil
308
+ File.open(pep_xml_file).each do |line|
309
+ if line =~ search_result_regex1
310
+ filename = $1.dup
311
+ pep_charge = $2.dup
312
+ elsif line =~ search_result_regex2
313
+ filename = $1.dup
314
+ pep_charge = $2.dup
315
+ end
316
+ if line =~ search_hit_regex
317
+ pep_seq = $1.dup
318
+ seq_charge = [pep_seq, pep_charge]
319
+ seq_charge_hash[seq_charge] << filename
320
+ end
321
+ end
322
+ end
323
+ seq_charge_hash
324
+ end
325
+
326
+ # drops all search_hits that have peptideprophet probability < min_val
327
+ # and drops any search_results that end up with 0 search_hits
328
+ def filter_by_min_pep_prob(file, outfile, min_val)
329
+ root = root_el(file)
330
+
331
+ d_search_hit = nil
332
+ d_search_result = nil
333
+ root.children.each do |child1|
334
+ if child1.name == 'msms_run_summary'
335
+ d_search_result = []
336
+ child1.children.each do |child2|
337
+ if child2.name == 'search_result'
338
+ #puts "size before: " + child2.size.to_s
339
+ d_search_hit = []
340
+ child2.children.each do |child3|
341
+ if child3.name == 'search_hit'
342
+ child3.children.each do |child4|
343
+ if child4.name == 'peptideprophet_result'
344
+ if child4.attrs["probability"].to_f < min_val
345
+ #puts "dropping probability: #{child4.attrs["probability"]}"
346
+ d_search_hit << child3
347
+ else
348
+ #puts "keeping probability: #{child4.attrs["probability"]}"
349
+ end
350
+ end
351
+ end
352
+ end
353
+ end
354
+ d_search_hit.each do |to_drop|
355
+ to_drop.drop
356
+ end
357
+ #puts "size after: " + child2.size.to_s
358
+ if child2.size == 0
359
+ d_search_result << child2
360
+ end
361
+ end
362
+ end
363
+ d_search_result.each do |to_drop|
364
+ to_drop.drop
365
+ end
366
+ end
367
+ end
368
+
369
+ File.open(outfile, "w") do |fh|
370
+ fh.print root.to_s
371
+ end
372
+ end
373
+ end # Pep::Parser
374
+
375
+
376
+ # Class for parsing the '*-prot.xml' files in different ways
377
+ class Proph::Prot::Parser < Parser
378
+
379
+ attr_accessor :prots
380
+ attr_writer :peps
381
+
382
+ def initialize
383
+ @prots = []
384
+ end
385
+
386
+ # returns all the peptides from prots
387
+ def peps
388
+ unless @peps
389
+ @peps = []
390
+ @prots.each do |prot|
391
+ @peps.push(*(prot.peps))
392
+ end
393
+ end
394
+ @peps
395
+ end
396
+
397
+
398
+ # sets and returns an array of Prot objects
399
+ # parse_type = "rexml" | "regex"
400
+ def get_prots_and_peps(protxmlfile, prot_prob_cutoff=1.0, pep_init_prob_cutoff=1.0, pep_nsp_prob_cutoff=1.0, parse_type="rexml")
401
+ ## ensure these are all floats
402
+ (prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff) = [prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff].collect do |cutoff|
403
+ cutoff.to_f
404
+ end
405
+
406
+ case parse_type
407
+ when "rexml"
408
+ doc = REXML::Document.new File.new(protxmlfile)
409
+ doc.elements.each("protein_summary/protein_group/protein") do |elem|
410
+ if elem.attributes['probability'].to_f >= prot_prob_cutoff
411
+ prob = elem.attributes['probability'].to_f
412
+ name= elem.attributes['protein_name']
413
+ curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff})
414
+ peptides = []
415
+ elem.elements.to_a('peptide').each do |pep|
416
+ if pep.attributes['nsp_adjusted_probability'].to_f >= pep_nsp_prob_cutoff && pep.attributes['initial_probability'].to_f >= pep_init_prob_cutoff
417
+ nsp_prob = pep.attributes['nsp_adjusted_probability'].to_f
418
+ sequence = pep.attributes['peptide_sequence']
419
+ charge = pep.attributes['charge']
420
+ pnm = pep.attributes['precursor_neutral_mass']
421
+ peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :precursor_neutral_mass => pnm, :nsp_cutoff => pep_nsp_prob_cutoff))
422
+ end
423
+ ## Only take proteins with peptides!
424
+ if peptides.size > 0
425
+ curr_prot.peps = peptides
426
+ @prots << curr_prot
427
+ end
428
+ end
429
+ end
430
+ end
431
+ when "regex"
432
+ prot_regex = /<protein protein_name="(.*)?" n_indistinguishable_proteins(.*)/o
433
+ prot_prob_regex = /probability="([\d\.]+)"/o
434
+ pep_regex = /<peptide peptide_sequence="(\w+)?"(.*)/o
435
+ pep_else_regex = /charge="(\d)" initial_probability="([\d\.]+)" nsp_adjusted_probability="([\d\.]+)"/o
436
+
437
+ curr_prot = nil
438
+ peptides = []
439
+ File.open(protxmlfile).each do |line|
440
+ if line =~ prot_regex
441
+ prob = nil
442
+ name = $1.dup
443
+ rest = $2
444
+ if rest =~ prot_prob_regex
445
+ prob = $1.dup
446
+ end
447
+ if curr_prot
448
+ if curr_prot.probability.to_f >= prot_prob_cutoff
449
+ if peptides.size > 0
450
+ curr_prot.peps = peptides
451
+ @prots.push(curr_prot)
452
+ end
453
+ end
454
+ end
455
+ curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff})
456
+ peptides = []
457
+ end
458
+ if line =~ pep_regex
459
+ sequence = $1.dup
460
+ rest = $2
461
+ if rest =~ pep_else_regex
462
+ charge = $1
463
+ init_prob = $2
464
+ nsp_prob = $3
465
+ if nsp_prob.to_f >= pep_nsp_prob_cutoff && init_prob.to_f >= pep_init_prob_cutoff
466
+ peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :nsp_cutoff => pep_nsp_prob_cutoff))
467
+ end
468
+ end
469
+ end
470
+ # get the last one:
471
+ if curr_prot && curr_prot.probability.to_f > prot_prob_cutoff && peptides.size > 0
472
+ curr_prot.peps = peptides
473
+ @prots.push(curr_prot)
474
+ end
475
+ end
476
+ end
477
+ @prots
478
+ end
479
+
480
+ end # Prot::Parser
481
+
482
+ ################ --END
483
+
484
+ =end
@@ -0,0 +1,166 @@
1
+ require 'merge'
2
+ module Ms ; end
3
+ module Ms::Ident ; end
4
+ class Ms::Ident::Pepxml ; end
5
+
6
+ class Ms::Ident::Pepxml::SampleEnzyme
7
+ include Merge
8
+ # an identifier
9
+ attr_accessor :name
10
+ # amino acids after which to cleave
11
+ attr_accessor :cut
12
+ # cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
13
+ attr_accessor :no_cut
14
+ # 'C' or 'N'
15
+ attr_accessor :sense
16
+
17
+ # Can pass in a name of an enzyme that is recognized (meaning there is a
18
+ # set_<name> method), or
19
+ # trypsin
20
+ # For other enzymes, you must set :cut, :no_cut, :name, and :sense will
21
+ def initialize(arg={})
22
+ if arg.is_a?(String)
23
+ @name = arg
24
+ send("set_#{@name}".to_sym)
25
+ else
26
+ merge!(arg)
27
+ end
28
+ end
29
+
30
+ def set_trypsin
31
+ @sense = 'C'
32
+ @cut = 'KR'
33
+ @no_cut = 'P'
34
+ end
35
+
36
+ # if an xml builder object is given, it adds to the object and returns the
37
+ # builder object, otherwise it returns an xml fragment string
38
+ def to_xml(builder=nil)
39
+ xmlb = builder || Nokogiri::XML::Builder.new
40
+ xmlb.sample_enzyme(:name => name) do |xmlb|
41
+ xmlb.specificity(:cut => cut, :no_cut => no_cut, :sense => sense)
42
+ end
43
+ builder || xmlb.doc.root.to_xml
44
+ end
45
+
46
+ # returns self
47
+ def from_pepxml_node(node)
48
+ self.name = node['name']
49
+ ch = node.child
50
+ self.cut = ch['cut']
51
+ self.no_cut= ch['no_cut']
52
+ self.sense = ch['sense']
53
+ self
54
+ end
55
+
56
+ def self.from_pepxml_node(node)
57
+ self.new.from_pepxml_node(node)
58
+ end
59
+ end
60
+
61
+ ###################################################
62
+ ###################################################
63
+ ###################################################
64
+ ###################################################
65
+ # This is digestion methodology:
66
+
67
+ =begin
68
+
69
+ require 'strscan'
70
+
71
+ # takes an amino acid sequence (e.g., -.PEPTIDK.L)
72
+ # returns the number of missed cleavages
73
+ def num_missed_cleavages(aaseq)
74
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
75
+ @num_missed_cleavages_regex =
76
+ if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
77
+ else
78
+ regex_string = "[#{@cut}]"
79
+ if @no_cut and @no_cut != ''
80
+ regex_string << "[^#{@no_cut}]"
81
+ end
82
+ /#{regex_string}/
83
+ end
84
+ arr = aaseq.scan(@num_missed_cleavages_regex)
85
+ num = arr.size
86
+ if aaseq[-1,1] =~ @num_missed_cleavages_regex
87
+ num -= 1
88
+ end
89
+ num
90
+ end
91
+
92
+ # requires full sequence (with heads and tails)
93
+ def num_tol_term(sequence)
94
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
95
+ no_cut = @no_cut || ''
96
+ num_tol = 0
97
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
98
+ last_of_middle = middle[-1,1]
99
+ first_of_middle = middle[0,1]
100
+ if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
101
+ num_tol += 1
102
+ end
103
+ if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
104
+ num_tol += 1
105
+ end
106
+ num_tol
107
+ end
108
+
109
+ # returns all peptides of missed cleavages <= 'missed_cleavages'
110
+ # so 2 missed cleavages will return all no missed cleavage peptides
111
+ # all 1 missed cleavages and all 2 missed cleavages.
112
+ # options:
113
+ def digest(string, missed_cleavages=0, options={})
114
+ raise NotImplementedError if @sense == 'N'
115
+ s = StringScanner.new(string)
116
+ no_cut_regex = Regexp.new("[#{@no_cut}]")
117
+ regex = Regexp.new("[#{@cut}]")
118
+ peps = []
119
+ last_pos = 0
120
+ current_pep = ''
121
+ loop do
122
+ if s.eos?
123
+ break
124
+ end
125
+ m = s.scan_until(regex)
126
+ if m ## found a cut point
127
+ last_pos = s.pos
128
+ # is the next amino acid a no_cut?
129
+ if string[s.pos,1] =~ no_cut_regex
130
+ current_pep << m
131
+ else
132
+ # cut it
133
+ current_pep << m
134
+ peps << current_pep
135
+ current_pep = ''
136
+ end
137
+ else ## didn't find a cut point
138
+ current_pep << string[last_pos..-1]
139
+ peps << current_pep
140
+ break
141
+ end
142
+ end
143
+ ## LOOP through and grab each set of missed cleavages from num down to 0
144
+ all_sets_of_peps = []
145
+ (0..missed_cleavages).to_a.reverse.each do |num_mc|
146
+ all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
147
+ end
148
+ all_sets_of_peps
149
+ end
150
+
151
+ # takes an array of peptides and returns an array containing 'num' missed
152
+ # cleavages
153
+ # DOES NOT contain peptides that contain < num of missed cleavages
154
+ # (i.e., will not return missed cleaveages of 1 or 2 if num == 3
155
+ def get_missed_cleavages(ar_of_peptide_seqs, num)
156
+ (0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
157
+ ar_of_peptide_seqs[i,num+1].join
158
+ end
159
+ end
160
+
161
+ def self.tryptic(string, missed_cleavages=0)
162
+ self.new("trypsin").digest(string, missed_cleavages)
163
+ end
164
+
165
+ end
166
+ =end
@@ -0,0 +1,42 @@
1
+ require 'merge'
2
+ module Ms ; end
3
+ module Ms::Ident ; end
4
+
5
+ class Ms::Ident::Pepxml
6
+ class SearchDatabase
7
+ include Merge
8
+ # required! the local, full path to the protein sequence database
9
+ attr_accessor :local_path
10
+ # required! 'AA' or 'NA'
11
+ attr_accessor :seq_type
12
+
13
+ # optional
14
+ attr_accessor :database_name
15
+ # optional
16
+ attr_accessor :orig_database_url
17
+ # optional
18
+ attr_accessor :database_release_date
19
+ # optional
20
+ attr_accessor :database_release_identifier
21
+ # optional
22
+ attr_accessor :size_of_residues
23
+
24
+ # takes a hash to fill in values
25
+ def initialize(hash={}, get_size_of_residues=false)
26
+ merge!(hash)
27
+ if get_size_of_residues && File.exist?(@local_path)
28
+ @size_of_residues = 0
29
+ Ms::Fasta.foreach(@local_path) do |entry|
30
+ @size_of_residues += entry.sequence.size
31
+ end
32
+ end
33
+ end
34
+
35
+ def to_xml(builder)
36
+ attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
37
+ builder.search_database(Hash[attrs])
38
+ builder
39
+ end
40
+ end
41
+
42
+ end