mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -285,47 +285,6 @@ class Spec::MzXML::Parser
285
285
  # in progress
286
286
  end
287
287
 
288
- # first, converts backslash to forward slash in filename.
289
- # if .mzXML returns the filename
290
- # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
291
- # if no recognized extension, looks for .mzXML file, then .RAW file (and
292
- # converts)
293
- # aborts if file was not able to be converted
294
- def file_to_mzxml(file)
295
- file.gsub!("\\",'/')
296
- old_file = file.dup
297
- if file =~ /\.mzXML$/
298
- return file
299
- elsif file =~ /(\.RAW)|(\.raw)$/
300
- old_file = file.dup
301
- ## t2x outputs in cwd (so go to the directory of the file!)
302
- dir = File.dirname(file)
303
- basename = File.basename(file)
304
- Dir.chdir(dir) do
305
- cmd = "#{Spec::MzXML::MZXML_CONVERTER} #{basename}"
306
- puts cmd
307
- puts `#{cmd}`
308
- end
309
- file.sub!(/\.RAW$/, '.mzXML')
310
- file.sub!(/\.raw$/, '.mzXML')
311
- unless File.exist? file
312
- abort "Couldn't convert #{old_file} to #{file}"
313
- end
314
- return file
315
- else
316
- if File.exist?( file + '.mzXML' )
317
- return file_to_mzxml(file + '.mzXML')
318
- elsif File.exist?( file + '.RAW' )
319
- return file_to_mzxml(file + '.RAW')
320
- elsif File.exist?( file + '.raw' )
321
- return file_to_mzxml(file + '.raw')
322
- else
323
- return nil
324
- end
325
- end
326
-
327
- end
328
-
329
288
  def get_prec_mz_by_scan_for_time_index(file)
330
289
  index = Spec::MSRunIndex.new(file)
331
290
  prec_mz_by_scan = index.scans_by_num.collect do |scan|
@@ -356,7 +315,7 @@ class Spec::MzXML::Parser
356
315
  return get_prec_mz_by_scan_for_time_index(file)
357
316
  end
358
317
 
359
- file = file_to_mzxml(file)
318
+ file = Spec::MzXML.file_to_mzxml(file)
360
319
 
361
320
  unless parse_type then parse_type = default_parser end
362
321
  case parse_type
@@ -386,7 +345,7 @@ class Spec::MzXML::Parser
386
345
  # startMz start_mz
387
346
  # endMz end_mz
388
347
  def basic_info(mzxml_file)
389
- puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}"
348
+ puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
390
349
  hash = {}
391
350
  scan_count_tmp = []
392
351
  (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
data/lib/spec/mzxml.rb CHANGED
@@ -4,14 +4,62 @@ require 'base64'
4
4
  module Spec; end
5
5
 
6
6
  module Spec::MzXML
7
-
8
- MZXML_CONVERTER = 't2x'
7
+ Potential_mzxml_converters = %w(readw.exe readw t2x)
9
8
 
10
9
  # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
11
10
  def strip_time(time)
12
11
  return time[2...-1]
13
12
  end
14
13
 
14
+ # first, converts backslash to forward slash in filename.
15
+ # if .mzXML returns the filename
16
+ # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
17
+ # if no recognized extension, looks for .mzXML file, then .RAW file (and
18
+ # converts)
19
+ # aborts if file was not able to be converted
20
+ # returns nil if a file that can be converted or used was not found
21
+ def self.file_to_mzxml(file)
22
+ file.gsub!("\\",'/')
23
+ old_file = file.dup
24
+ if file =~ /\.mzXML$/
25
+ return file
26
+ elsif file =~ /\.RAW$/i
27
+ old_file = file.dup
28
+ ## t2x outputs in cwd (so go to the directory of the file!)
29
+ dir = File.dirname(file)
30
+ basename = File.basename(file)
31
+ converter = Spec::MzXML.find_mzxml_converter
32
+ Dir.chdir(dir) do
33
+ if converter =~ /readw/
34
+ cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
35
+ else
36
+ cmd = "#{converter} #{basename}"
37
+ end
38
+ #puts cmd
39
+ #puts `#{cmd}`
40
+ reply = `#{cmd}`
41
+ puts reply if $VERBOSE
42
+ end
43
+ file.sub!(/\.RAW$/i, '.mzXML')
44
+ unless File.exist? file
45
+ abort "Couldn't convert #{old_file} to #{file}"
46
+ end
47
+ return file
48
+ else
49
+ if File.exist?( file + '.mzXML' )
50
+ return file_to_mzxml(file + '.mzXML')
51
+ elsif File.exist?( file + '.RAW' )
52
+ return file_to_mzxml(file + '.RAW')
53
+ elsif File.exist?( file + '.raw' )
54
+ return file_to_mzxml(file + '.raw')
55
+ else
56
+ return nil
57
+ end
58
+ end
59
+
60
+ end
61
+
62
+
15
63
 
16
64
  # takes a base64 peaks string and returns an array of [m/z,intens] doublets
17
65
  # mzXML as network ordered
@@ -42,5 +90,20 @@ module Spec::MzXML
42
90
  b64d.unpack(unpack_code)
43
91
  end
44
92
 
93
+ # Searchs each path element and returns the first one it finds
94
+ # returns nil if none found
95
+ def self.find_mzxml_converter
96
+ ENV['PATH'].split(/[:;]/).each do |path|
97
+ Dir.chdir(path) do
98
+ Potential_mzxml_converters.each do |pc|
99
+ if File.exist? pc
100
+ return File.join(path, pc)
101
+ end
102
+ end
103
+ end
104
+ end
105
+ nil
106
+ end
107
+
45
108
 
46
109
  end
@@ -1,5 +1,7 @@
1
1
  require 'fasta'
2
2
 
3
+ module SpecID ; end
4
+
3
5
  class SpecID::AAFreqs
4
6
  # a fasta object
5
7
  attr_accessor :fasta
@@ -77,16 +79,17 @@ class SpecID::AAFreqs
77
79
  end
78
80
 
79
81
  # pep_objs respond to sequence?
82
+ # also takes a hash of peptides keyed on :aaseq
80
83
  def actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq)
81
- @aafreqs ||= {}
82
- @aafreqs[:C] = cyst_freq
83
- seqs = pep_objs.map do |v|
84
- if v.sequence =~ /\.([\w\*]+)\./
85
- $1
86
- else
87
- abort v.sequence.to_s + " could not be matched!"
84
+ if pep_objs.is_a? Hash
85
+ seqs = pep_objs.keys
86
+ else
87
+ seqs = pep_objs.map do |v|
88
+ v.aaseq
88
89
  end
89
90
  end
91
+ @aafreqs ||= {}
92
+ @aafreqs[:C] = cyst_freq
90
93
  actual_and_expected_number(seqs, :C, 1)
91
94
  end
92
95
 
@@ -6,13 +6,17 @@ require 'spec_id'
6
6
  require 'zlib'
7
7
  require 'hash_by'
8
8
  require 'set_from_hash'
9
+ require 'array_class'
9
10
 
10
11
  ## have to pre-declare some guys
11
- class SpecID; end
12
- class SpecID::Prot; end
12
+ module SpecID; end
13
+ module SpecID::Prot; end
14
+ module SpecID::Pep; end
13
15
  module SpecIDXML; end
14
16
 
15
- class SpecID::Bioworks
17
+ class Bioworks
18
+ include SpecID
19
+
16
20
  # Regular expressions
17
21
  @@bioworksinfo_re = /<bioworksinfo>(.*)<\/bioworksinfo>/o
18
22
  @@modifications_re = /<modifications>(.*)<\/modifications>/o
@@ -21,10 +25,9 @@ class SpecID::Bioworks
21
25
  @@origfilepath_re = /<origfilepath>(.*)<\/origfilepath>/o
22
26
 
23
27
 
24
- attr_accessor :prots, :version, :global_filename, :origfilename, :origfilepath
28
+ attr_accessor :peps, :prots, :version, :global_filename, :origfilename, :origfilepath
25
29
  # a string of modifications e.g., "(M* +15.99491) (S@ +14.9322) "
26
30
  attr_accessor :modifications
27
- attr_writer :peps
28
31
 
29
32
  def hi_prob_best ; false end
30
33
 
@@ -127,6 +130,7 @@ class SpecID::Bioworks
127
130
  # note that each pep will contain its original prot it belongs to, even
128
131
  # though the parallel protein actually represents the proteins it belongs
129
132
  # to.
133
+ # assumes that each peptide points to all its proteins in pep.prots
130
134
  def _uniq_peps_by_sequence_charge(peps)
131
135
  new_arr = []
132
136
  prot_arr = []
@@ -134,11 +138,11 @@ class SpecID::Bioworks
134
138
  (0...peps.size).each do |i|
135
139
  next if index_accounted_for.include?(i)
136
140
  new_arr << peps[i]
137
- prot_arr.push( [peps[i].prot] )
141
+ prot_arr.push( peps[i].prots )
138
142
  ((i+1)...peps.size).each do |j|
139
143
  pep1, pep2 = peps[i], peps[j]
140
144
  if pep1.sequence == pep2.sequence && pep1.charge == pep2.charge
141
- prot_arr.last.push pep2.prot
145
+ prot_arr.last.push( *(pep2.prots) )
142
146
  index_accounted_for << j
143
147
  end
144
148
  end
@@ -149,13 +153,14 @@ class SpecID::Bioworks
149
153
  def initialize(file=nil)
150
154
  @peps = nil
151
155
  if file
156
+ @filename = file
152
157
  parse_xml(file)
153
158
  #parse_xml_by_xmlparser(file)
154
159
  end
155
160
  end
156
161
 
157
162
  def parse_xml_by_xmlparser(file)
158
- parser = SpecID::Bioworks::XMLParser.new
163
+ parser = Bioworks::XMLParser.new
159
164
  File.open(file) do |fh|
160
165
  #3.times do fh.gets end ## TEMPFIX
161
166
  parser.parse(fh)
@@ -165,23 +170,6 @@ class SpecID::Bioworks
165
170
  @prots = parser.prots
166
171
  end
167
172
 
168
-
169
- # Returns the list of all peptide hits. A given sequence/charge or scan
170
- # may be redundant!
171
- def peps
172
- if @peps
173
- return @peps
174
- else
175
- @peps = []
176
- prots.each do |prot|
177
- prot.peps.each do |pep|
178
- @peps << pep
179
- end
180
- end
181
- return @peps
182
- end
183
- end
184
-
185
173
  # This is highly specific to Bioworks 3.2 xml export. In other words,
186
174
  # unless the newlines, etc. are duplicated, this parser will fail! Not
187
175
  # robust, but it is faster than xmlparser (which is based on the speedy
@@ -200,21 +188,23 @@ class SpecID::Bioworks
200
188
  end
201
189
  @version = get_regex_val(fh, @@bioworksinfo_re)
202
190
  @modifications = get_regex_val(fh, @@modifications_re)
203
- @prots = get_prots(fh, self)
191
+ @prots, @peps = get_prots_from_xml_stream(fh)
204
192
  fh.close
205
193
  end
206
194
 
207
- def get_prots(fh, bioworks)
195
+ ## returns proteins and peptides
196
+ def get_prots_from_xml_stream(fh)
197
+ uniq_pephit_hash = {}
208
198
  prots = []
209
199
  while line = fh.gets
210
200
  if line =~ @@protein_re
211
- prot = SpecID::Bioworks::Prot.new
201
+ prot = Bioworks::Prot.new
212
202
  prot.bioworks = self
213
- prot.set_from_xml_stream(fh, bioworks)
203
+ prot.set_from_xml_stream(fh, uniq_pephit_hash)
214
204
  prots << prot
215
205
  end
216
206
  end
217
- prots
207
+ [prots, uniq_pephit_hash.values]
218
208
  end
219
209
 
220
210
  # gets the regex and stops (and rewinds if it hits a protein)
@@ -246,7 +236,7 @@ end
246
236
 
247
237
  # Implements fast parsing via XMLParser (wrapper around Expat)
248
238
  # It is actually slower (about %25 slower) than regular expression parsing
249
- class SpecID::Bioworks::XMLParser < XMLParser
239
+ class Bioworks::XMLParser < XMLParser
250
240
  @@at = '@'
251
241
  attr_accessor :prots
252
242
 
@@ -262,18 +252,18 @@ class SpecID::Bioworks::XMLParser < XMLParser
262
252
  case name
263
253
  when "peptide"
264
254
  curr_prot = @current_obj
265
- if @current_obj.class == SpecID::Bioworks::Prot
255
+ if @current_obj.class == Bioworks::Prot
266
256
  @current_obj.set_from_xml_hash_xmlparser(@current_hash)
267
257
  else
268
258
  curr_prot = @current_obj.prot ## unless previous was a peptide
269
259
  end
270
- peptide = SpecID::Bioworks::Pep.new
260
+ peptide = Bioworks::Pep.new
271
261
  peptide.prot = curr_prot
272
262
  curr_prot.peps << peptide
273
263
  @current_obj = peptide
274
264
  @current_hash = {}
275
265
  when "protein"
276
- @current_obj = SpecID::Bioworks::Prot.new
266
+ @current_obj = Bioworks::Prot.new
277
267
  @current_hash = {}
278
268
  @prots << @current_obj
279
269
  else
@@ -297,13 +287,14 @@ class SpecID::Bioworks::XMLParser < XMLParser
297
287
 
298
288
  end
299
289
 
300
- module SpecID::Bioworks::XML
290
+ module Bioworks::XML
301
291
  # The regular expression to grab attributes from the bioworks xml format
302
292
  @@att_re = /<([\w]+)>(.*)<\/[\w]+>/o
303
293
  end
304
294
 
305
- class SpecID::Bioworks::Prot < SpecID::Prot
306
- include SpecID::Bioworks::XML
295
+ class Bioworks::Prot
296
+ include SpecID::Prot
297
+ include Bioworks::XML
307
298
 
308
299
  @@end_prot_re = /<\/protein>/o
309
300
  @@pep_re = /<peptide>/o
@@ -323,15 +314,32 @@ class SpecID::Bioworks::Prot < SpecID::Prot
323
314
  end
324
315
  end
325
316
 
326
- def set_from_xml_stream(fh, bioworks)
317
+ def set_from_xml_stream(fh, uniq_pephit_hash)
327
318
  hash = {}
319
+ @peps = []
328
320
  while line = fh.gets
329
321
  if line =~ @@att_re
330
322
  hash[$1] = $2
331
323
  elsif line =~ @@pep_re
332
- pep = SpecID::Bioworks::Pep.new.set_from_xml_stream(fh, self)
333
- pep.prot = self
324
+ ## Could do a look ahead to grab the file and sequence to check
325
+ ## uniqueness to increase speed here.
326
+ pep = Bioworks::Pep.new.set_from_xml_stream(fh)
327
+ # normal search results files have a global filename
328
+ # while multi-consensus do not
329
+ pep[12] ||= bioworks.global_filename
330
+
331
+ ## figure out uniqueness
332
+ ky = [pep.base_name, pep.first_scan, pep.charge, pep.sequence]
333
+ if uniq_pephit_hash.key? ky
334
+ pep = uniq_pephit_hash[ky]
335
+ else
336
+ ## insert the new protein
337
+ pep.prots = []
338
+ uniq_pephit_hash[ky] = pep
339
+ end
340
+ pep.prots << self
334
341
  @peps << pep
342
+
335
343
  elsif line =~ @@end_prot_re
336
344
  set_from_xml_hash(hash)
337
345
  break
@@ -367,9 +375,12 @@ class SpecID::Bioworks::Prot < SpecID::Prot
367
375
  end
368
376
  end
369
377
 
378
+ Bioworks::Pep = ArrayClass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
379
+ # 0=sequence 1=mass 2=deltamass 3=charge 4=xcorr 5=deltacn 6=sp 7=rsp 8=ions 9=count 10=tic 11=prots 12=base_name 13=first_scan 14=last_scan 15=peptide_probability 16=file 17=_num_prots 18=_first_prot 19=aaseq
370
380
 
371
- class SpecID::Bioworks::Pep < Array
372
- include SpecID::Bioworks::XML
381
+ class Bioworks::Pep
382
+ include SpecID::Pep
383
+ include Bioworks::XML
373
384
  include SpecIDXML
374
385
 
375
386
  @@file_split_first_re = /, /o
@@ -380,53 +391,18 @@ class SpecID::Bioworks::Pep < Array
380
391
  @@file_mult_scan_re = /(.*), (\d+) - (\d+)/o
381
392
  ## NOTE! the mass is really the theoretical MH+!!!!
382
393
  ## NOTE! ALL values stored as strings, except peptide_probability!
383
- ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
384
- ind_keys = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prot => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18 }
385
-
386
-
387
- def sequence ; self[0] end ; def sequence=(oth) ; self[0] = oth end
388
- def mass ; self[1] end ; def mass=(oth) ; self[1] = oth end
389
- def deltamass ; self[2] end ; def deltamass=(oth) ; self[2] = oth end
390
- def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
391
- def xcorr ; self[4] end ; def xcorr=(oth) ; self[4] = oth end
392
- def deltacn ; self[5] end ; def deltacn=(oth) ; self[5] = oth end
393
- def sp ; self[6] end ; def sp=(oth) ; self[6] = oth end
394
- def rsp ; self[7] end ; def rsp=(oth) ; self[7] = oth end
395
- def ions ; self[8] end ; def ions=(oth) ; self[8] = oth end
396
- def count ; self[9] end ; def count=(oth) ; self[9] = oth end
397
- def tic ; self[10] end ; def tic=(oth) ; self[10] = oth end
398
- def prot ; self[11] end ; def prot=(oth) ; self[11] = oth end
399
- def base_name ; self[12] end ; def base_name=(oth) ; self[12] = oth end
400
- def first_scan ; self[13] end ; def first_scan=(oth) ; self[13] = oth end
401
- def last_scan ; self[14] end ; def last_scan=(oth) ; self[14] = oth end
402
- def peptide_probability ; self[15] end ; def peptide_probability=(oth) ; self[15] = oth end
403
- def file ; self[16] end # we define a writer below
404
- def _num_prots ; self[17] end ; def _num_prots=(oth) ; self[17] = oth end
405
- def _first_prot ; self[18] end ; def _first_prot=(oth) ; self[18] = oth end
406
394
 
407
395
  ## other accessors:
408
396
  def probability ; self[15] end
397
+ def mh ; self[1] end
409
398
 
410
- #ind_keys.keys do |k|
411
- # self.module_eval( "def #{k} ; self[#{ind_keys[k]}] end ; def #{k}=(oth) ; self[#{ind_keys[k]} = oth end ", __FILE__, __LINE__ )
412
- #end
413
- @@arr_size = ind_keys.size
414
- ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
415
- ind_keys.merge!(ind_keys_w_eq)
416
- ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
417
-
418
- def initialize(args=nil)
419
- super(@@arr_size.size)
420
- if args
421
- if args.is_a? Hash
422
- args.each do |k,v|
423
- self[@@ind[k]] = v
424
- end
425
- end
426
- end
399
+ # This is not a true ppm since it should be divided by the actual mh instead
400
+ # of the theoretical (but it is as close as we can get for this object)
401
+ def ppm
402
+ 1.0e6 * (self[2].abs/self[1])
403
+ #1.0e6 * (self.deltamass.abs/self.mh)
427
404
  end
428
405
 
429
-
430
406
  # returns array of values of the attributes given (as symbols)
431
407
  def get(*args)
432
408
  args.collect do |arg|
@@ -463,25 +439,30 @@ class SpecID::Bioworks::Pep < Array
463
439
  [base_name, first_scan, last_scan]
464
440
  end
465
441
 
442
+ tmp_verb = $VERBOSE
443
+ $VERBOSE = nil
466
444
  def file=(arg)
467
445
  ## Set these vals by index:
468
446
  #puts "AERRG: #{arg}"
469
447
  self[16] = arg
470
448
  self[12,3] = self.class.extract_file_info(arg)
471
449
  end
450
+ $VERBOSE = tmp_verb
472
451
 
473
452
  def inspect
474
- "<SpecID::Bioworks::Pep sequence: #{sequence}, mass: #{mass}, deltamass: #{deltamass}, charge: #{charge}, xcorr: #{xcorr}, deltacn: #{deltacn} prot: #{prot} base_name: #{base_name} first_scan: #{first_scan} last_scan: #{last_scan} file: #{file} peptide_probability: #{peptide_probability}>"
453
+ "<Bioworks::Pep sequence: #{sequence}, mass: #{mass}, deltamass: #{deltamass}, charge: #{charge}, xcorr: #{xcorr}, deltacn: #{deltacn}, prots(count):#{prots.size}, base_name: #{base_name}, first_scan: #{first_scan}, last_scan: #{last_scan}, file: #{file}, peptide_probability: #{peptide_probability}, aaseq:#{aaseq}>"
454
+
455
+
475
456
  end
476
457
 
477
458
  def set_from_hash(hash)
478
459
  self[0,11] = [hash["sequence"], hash["mass"], hash["deltamass"], hash["charge"], hash["xcorr"], hash["deltacn"], hash["sp"], hash["rsp"], hash["ions"], hash["count"], hash["tic"]]
479
460
  self.file = hash["file"]
480
461
  self[15] = hash["peptide_probability"].to_f
462
+ self[19] = SpecID::Pep.sequence_to_aaseq(self[0]) ## aaseq
481
463
  end
482
464
 
483
- def set_from_xml_stream(fh, prot)
484
- self[11] = prot
465
+ def set_from_xml_stream(fh)
485
466
  hash = {}
486
467
  while line = fh.gets
487
468
  if line =~ @@att_re
@@ -491,7 +472,6 @@ class SpecID::Bioworks::Pep < Array
491
472
  elsif line =~ @@end_pep_re
492
473
  set_from_hash(hash)
493
474
  #puts "SELF[12]: #{self[12]}"
494
- unless self[12] then self[12] = prot.bioworks.global_filename end
495
475
  #puts "SELF[12]: #{self[12]}"
496
476
  break
497
477
  else