mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
@@ -285,47 +285,6 @@ class Spec::MzXML::Parser
285
285
  # in progress
286
286
  end
287
287
 
288
- # first, converts backslash to forward slash in filename.
289
- # if .mzXML returns the filename
290
- # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
291
- # if no recognized extension, looks for .mzXML file, then .RAW file (and
292
- # converts)
293
- # aborts if file was not able to be converted
294
- def file_to_mzxml(file)
295
- file.gsub!("\\",'/')
296
- old_file = file.dup
297
- if file =~ /\.mzXML$/
298
- return file
299
- elsif file =~ /(\.RAW)|(\.raw)$/
300
- old_file = file.dup
301
- ## t2x outputs in cwd (so go to the directory of the file!)
302
- dir = File.dirname(file)
303
- basename = File.basename(file)
304
- Dir.chdir(dir) do
305
- cmd = "#{Spec::MzXML::MZXML_CONVERTER} #{basename}"
306
- puts cmd
307
- puts `#{cmd}`
308
- end
309
- file.sub!(/\.RAW$/, '.mzXML')
310
- file.sub!(/\.raw$/, '.mzXML')
311
- unless File.exist? file
312
- abort "Couldn't convert #{old_file} to #{file}"
313
- end
314
- return file
315
- else
316
- if File.exist?( file + '.mzXML' )
317
- return file_to_mzxml(file + '.mzXML')
318
- elsif File.exist?( file + '.RAW' )
319
- return file_to_mzxml(file + '.RAW')
320
- elsif File.exist?( file + '.raw' )
321
- return file_to_mzxml(file + '.raw')
322
- else
323
- return nil
324
- end
325
- end
326
-
327
- end
328
-
329
288
  def get_prec_mz_by_scan_for_time_index(file)
330
289
  index = Spec::MSRunIndex.new(file)
331
290
  prec_mz_by_scan = index.scans_by_num.collect do |scan|
@@ -356,7 +315,7 @@ class Spec::MzXML::Parser
356
315
  return get_prec_mz_by_scan_for_time_index(file)
357
316
  end
358
317
 
359
- file = file_to_mzxml(file)
318
+ file = Spec::MzXML.file_to_mzxml(file)
360
319
 
361
320
  unless parse_type then parse_type = default_parser end
362
321
  case parse_type
@@ -386,7 +345,7 @@ class Spec::MzXML::Parser
386
345
  # startMz start_mz
387
346
  # endMz end_mz
388
347
  def basic_info(mzxml_file)
389
- puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}"
348
+ puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
390
349
  hash = {}
391
350
  scan_count_tmp = []
392
351
  (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
data/lib/spec/mzxml.rb CHANGED
@@ -4,14 +4,62 @@ require 'base64'
4
4
  module Spec; end
5
5
 
6
6
  module Spec::MzXML
7
-
8
- MZXML_CONVERTER = 't2x'
7
+ Potential_mzxml_converters = %w(readw.exe readw t2x)
9
8
 
10
9
  # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
11
10
  def strip_time(time)
12
11
  return time[2...-1]
13
12
  end
14
13
 
14
+ # first, converts backslash to forward slash in filename.
15
+ # if .mzXML returns the filename
16
+ # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
17
+ # if no recognized extension, looks for .mzXML file, then .RAW file (and
18
+ # converts)
19
+ # aborts if file was not able to be converted
20
+ # returns nil if a file that can be converted or used was not found
21
+ def self.file_to_mzxml(file)
22
+ file.gsub!("\\",'/')
23
+ old_file = file.dup
24
+ if file =~ /\.mzXML$/
25
+ return file
26
+ elsif file =~ /\.RAW$/i
27
+ old_file = file.dup
28
+ ## t2x outputs in cwd (so go to the directory of the file!)
29
+ dir = File.dirname(file)
30
+ basename = File.basename(file)
31
+ converter = Spec::MzXML.find_mzxml_converter
32
+ Dir.chdir(dir) do
33
+ if converter =~ /readw/
34
+ cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
35
+ else
36
+ cmd = "#{converter} #{basename}"
37
+ end
38
+ #puts cmd
39
+ #puts `#{cmd}`
40
+ reply = `#{cmd}`
41
+ puts reply if $VERBOSE
42
+ end
43
+ file.sub!(/\.RAW$/i, '.mzXML')
44
+ unless File.exist? file
45
+ abort "Couldn't convert #{old_file} to #{file}"
46
+ end
47
+ return file
48
+ else
49
+ if File.exist?( file + '.mzXML' )
50
+ return file_to_mzxml(file + '.mzXML')
51
+ elsif File.exist?( file + '.RAW' )
52
+ return file_to_mzxml(file + '.RAW')
53
+ elsif File.exist?( file + '.raw' )
54
+ return file_to_mzxml(file + '.raw')
55
+ else
56
+ return nil
57
+ end
58
+ end
59
+
60
+ end
61
+
62
+
15
63
 
16
64
  # takes a base64 peaks string and returns an array of [m/z,intens] doublets
17
65
  # mzXML as network ordered
@@ -42,5 +90,20 @@ module Spec::MzXML
42
90
  b64d.unpack(unpack_code)
43
91
  end
44
92
 
93
+ # Searchs each path element and returns the first one it finds
94
+ # returns nil if none found
95
+ def self.find_mzxml_converter
96
+ ENV['PATH'].split(/[:;]/).each do |path|
97
+ Dir.chdir(path) do
98
+ Potential_mzxml_converters.each do |pc|
99
+ if File.exist? pc
100
+ return File.join(path, pc)
101
+ end
102
+ end
103
+ end
104
+ end
105
+ nil
106
+ end
107
+
45
108
 
46
109
  end
@@ -1,5 +1,7 @@
1
1
  require 'fasta'
2
2
 
3
+ module SpecID ; end
4
+
3
5
  class SpecID::AAFreqs
4
6
  # a fasta object
5
7
  attr_accessor :fasta
@@ -77,16 +79,17 @@ class SpecID::AAFreqs
77
79
  end
78
80
 
79
81
  # pep_objs respond to sequence?
82
+ # also takes a hash of peptides keyed on :aaseq
80
83
  def actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq)
81
- @aafreqs ||= {}
82
- @aafreqs[:C] = cyst_freq
83
- seqs = pep_objs.map do |v|
84
- if v.sequence =~ /\.([\w\*]+)\./
85
- $1
86
- else
87
- abort v.sequence.to_s + " could not be matched!"
84
+ if pep_objs.is_a? Hash
85
+ seqs = pep_objs.keys
86
+ else
87
+ seqs = pep_objs.map do |v|
88
+ v.aaseq
88
89
  end
89
90
  end
91
+ @aafreqs ||= {}
92
+ @aafreqs[:C] = cyst_freq
90
93
  actual_and_expected_number(seqs, :C, 1)
91
94
  end
92
95
 
@@ -6,13 +6,17 @@ require 'spec_id'
6
6
  require 'zlib'
7
7
  require 'hash_by'
8
8
  require 'set_from_hash'
9
+ require 'array_class'
9
10
 
10
11
  ## have to pre-declare some guys
11
- class SpecID; end
12
- class SpecID::Prot; end
12
+ module SpecID; end
13
+ module SpecID::Prot; end
14
+ module SpecID::Pep; end
13
15
  module SpecIDXML; end
14
16
 
15
- class SpecID::Bioworks
17
+ class Bioworks
18
+ include SpecID
19
+
16
20
  # Regular expressions
17
21
  @@bioworksinfo_re = /<bioworksinfo>(.*)<\/bioworksinfo>/o
18
22
  @@modifications_re = /<modifications>(.*)<\/modifications>/o
@@ -21,10 +25,9 @@ class SpecID::Bioworks
21
25
  @@origfilepath_re = /<origfilepath>(.*)<\/origfilepath>/o
22
26
 
23
27
 
24
- attr_accessor :prots, :version, :global_filename, :origfilename, :origfilepath
28
+ attr_accessor :peps, :prots, :version, :global_filename, :origfilename, :origfilepath
25
29
  # a string of modifications e.g., "(M* +15.99491) (S@ +14.9322) "
26
30
  attr_accessor :modifications
27
- attr_writer :peps
28
31
 
29
32
  def hi_prob_best ; false end
30
33
 
@@ -127,6 +130,7 @@ class SpecID::Bioworks
127
130
  # note that each pep will contain its original prot it belongs to, even
128
131
  # though the parallel protein actually represents the proteins it belongs
129
132
  # to.
133
+ # assumes that each peptide points to all its proteins in pep.prots
130
134
  def _uniq_peps_by_sequence_charge(peps)
131
135
  new_arr = []
132
136
  prot_arr = []
@@ -134,11 +138,11 @@ class SpecID::Bioworks
134
138
  (0...peps.size).each do |i|
135
139
  next if index_accounted_for.include?(i)
136
140
  new_arr << peps[i]
137
- prot_arr.push( [peps[i].prot] )
141
+ prot_arr.push( peps[i].prots )
138
142
  ((i+1)...peps.size).each do |j|
139
143
  pep1, pep2 = peps[i], peps[j]
140
144
  if pep1.sequence == pep2.sequence && pep1.charge == pep2.charge
141
- prot_arr.last.push pep2.prot
145
+ prot_arr.last.push( *(pep2.prots) )
142
146
  index_accounted_for << j
143
147
  end
144
148
  end
@@ -149,13 +153,14 @@ class SpecID::Bioworks
149
153
  def initialize(file=nil)
150
154
  @peps = nil
151
155
  if file
156
+ @filename = file
152
157
  parse_xml(file)
153
158
  #parse_xml_by_xmlparser(file)
154
159
  end
155
160
  end
156
161
 
157
162
  def parse_xml_by_xmlparser(file)
158
- parser = SpecID::Bioworks::XMLParser.new
163
+ parser = Bioworks::XMLParser.new
159
164
  File.open(file) do |fh|
160
165
  #3.times do fh.gets end ## TEMPFIX
161
166
  parser.parse(fh)
@@ -165,23 +170,6 @@ class SpecID::Bioworks
165
170
  @prots = parser.prots
166
171
  end
167
172
 
168
-
169
- # Returns the list of all peptide hits. A given sequence/charge or scan
170
- # may be redundant!
171
- def peps
172
- if @peps
173
- return @peps
174
- else
175
- @peps = []
176
- prots.each do |prot|
177
- prot.peps.each do |pep|
178
- @peps << pep
179
- end
180
- end
181
- return @peps
182
- end
183
- end
184
-
185
173
  # This is highly specific to Bioworks 3.2 xml export. In other words,
186
174
  # unless the newlines, etc. are duplicated, this parser will fail! Not
187
175
  # robust, but it is faster than xmlparser (which is based on the speedy
@@ -200,21 +188,23 @@ class SpecID::Bioworks
200
188
  end
201
189
  @version = get_regex_val(fh, @@bioworksinfo_re)
202
190
  @modifications = get_regex_val(fh, @@modifications_re)
203
- @prots = get_prots(fh, self)
191
+ @prots, @peps = get_prots_from_xml_stream(fh)
204
192
  fh.close
205
193
  end
206
194
 
207
- def get_prots(fh, bioworks)
195
+ ## returns proteins and peptides
196
+ def get_prots_from_xml_stream(fh)
197
+ uniq_pephit_hash = {}
208
198
  prots = []
209
199
  while line = fh.gets
210
200
  if line =~ @@protein_re
211
- prot = SpecID::Bioworks::Prot.new
201
+ prot = Bioworks::Prot.new
212
202
  prot.bioworks = self
213
- prot.set_from_xml_stream(fh, bioworks)
203
+ prot.set_from_xml_stream(fh, uniq_pephit_hash)
214
204
  prots << prot
215
205
  end
216
206
  end
217
- prots
207
+ [prots, uniq_pephit_hash.values]
218
208
  end
219
209
 
220
210
  # gets the regex and stops (and rewinds if it hits a protein)
@@ -246,7 +236,7 @@ end
246
236
 
247
237
  # Implements fast parsing via XMLParser (wrapper around Expat)
248
238
  # It is actually slower (about %25 slower) than regular expression parsing
249
- class SpecID::Bioworks::XMLParser < XMLParser
239
+ class Bioworks::XMLParser < XMLParser
250
240
  @@at = '@'
251
241
  attr_accessor :prots
252
242
 
@@ -262,18 +252,18 @@ class SpecID::Bioworks::XMLParser < XMLParser
262
252
  case name
263
253
  when "peptide"
264
254
  curr_prot = @current_obj
265
- if @current_obj.class == SpecID::Bioworks::Prot
255
+ if @current_obj.class == Bioworks::Prot
266
256
  @current_obj.set_from_xml_hash_xmlparser(@current_hash)
267
257
  else
268
258
  curr_prot = @current_obj.prot ## unless previous was a peptide
269
259
  end
270
- peptide = SpecID::Bioworks::Pep.new
260
+ peptide = Bioworks::Pep.new
271
261
  peptide.prot = curr_prot
272
262
  curr_prot.peps << peptide
273
263
  @current_obj = peptide
274
264
  @current_hash = {}
275
265
  when "protein"
276
- @current_obj = SpecID::Bioworks::Prot.new
266
+ @current_obj = Bioworks::Prot.new
277
267
  @current_hash = {}
278
268
  @prots << @current_obj
279
269
  else
@@ -297,13 +287,14 @@ class SpecID::Bioworks::XMLParser < XMLParser
297
287
 
298
288
  end
299
289
 
300
- module SpecID::Bioworks::XML
290
+ module Bioworks::XML
301
291
  # The regular expression to grab attributes from the bioworks xml format
302
292
  @@att_re = /<([\w]+)>(.*)<\/[\w]+>/o
303
293
  end
304
294
 
305
- class SpecID::Bioworks::Prot < SpecID::Prot
306
- include SpecID::Bioworks::XML
295
+ class Bioworks::Prot
296
+ include SpecID::Prot
297
+ include Bioworks::XML
307
298
 
308
299
  @@end_prot_re = /<\/protein>/o
309
300
  @@pep_re = /<peptide>/o
@@ -323,15 +314,32 @@ class SpecID::Bioworks::Prot < SpecID::Prot
323
314
  end
324
315
  end
325
316
 
326
- def set_from_xml_stream(fh, bioworks)
317
+ def set_from_xml_stream(fh, uniq_pephit_hash)
327
318
  hash = {}
319
+ @peps = []
328
320
  while line = fh.gets
329
321
  if line =~ @@att_re
330
322
  hash[$1] = $2
331
323
  elsif line =~ @@pep_re
332
- pep = SpecID::Bioworks::Pep.new.set_from_xml_stream(fh, self)
333
- pep.prot = self
324
+ ## Could do a look ahead to grab the file and sequence to check
325
+ ## uniqueness to increase speed here.
326
+ pep = Bioworks::Pep.new.set_from_xml_stream(fh)
327
+ # normal search results files have a global filename
328
+ # while multi-consensus do not
329
+ pep[12] ||= bioworks.global_filename
330
+
331
+ ## figure out uniqueness
332
+ ky = [pep.base_name, pep.first_scan, pep.charge, pep.sequence]
333
+ if uniq_pephit_hash.key? ky
334
+ pep = uniq_pephit_hash[ky]
335
+ else
336
+ ## insert the new protein
337
+ pep.prots = []
338
+ uniq_pephit_hash[ky] = pep
339
+ end
340
+ pep.prots << self
334
341
  @peps << pep
342
+
335
343
  elsif line =~ @@end_prot_re
336
344
  set_from_xml_hash(hash)
337
345
  break
@@ -367,9 +375,12 @@ class SpecID::Bioworks::Prot < SpecID::Prot
367
375
  end
368
376
  end
369
377
 
378
+ Bioworks::Pep = ArrayClass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
379
+ # 0=sequence 1=mass 2=deltamass 3=charge 4=xcorr 5=deltacn 6=sp 7=rsp 8=ions 9=count 10=tic 11=prots 12=base_name 13=first_scan 14=last_scan 15=peptide_probability 16=file 17=_num_prots 18=_first_prot 19=aaseq
370
380
 
371
- class SpecID::Bioworks::Pep < Array
372
- include SpecID::Bioworks::XML
381
+ class Bioworks::Pep
382
+ include SpecID::Pep
383
+ include Bioworks::XML
373
384
  include SpecIDXML
374
385
 
375
386
  @@file_split_first_re = /, /o
@@ -380,53 +391,18 @@ class SpecID::Bioworks::Pep < Array
380
391
  @@file_mult_scan_re = /(.*), (\d+) - (\d+)/o
381
392
  ## NOTE! the mass is really the theoretical MH+!!!!
382
393
  ## NOTE! ALL values stored as strings, except peptide_probability!
383
- ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
384
- ind_keys = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prot => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18 }
385
-
386
-
387
- def sequence ; self[0] end ; def sequence=(oth) ; self[0] = oth end
388
- def mass ; self[1] end ; def mass=(oth) ; self[1] = oth end
389
- def deltamass ; self[2] end ; def deltamass=(oth) ; self[2] = oth end
390
- def charge ; self[3] end ; def charge=(oth) ; self[3] = oth end
391
- def xcorr ; self[4] end ; def xcorr=(oth) ; self[4] = oth end
392
- def deltacn ; self[5] end ; def deltacn=(oth) ; self[5] = oth end
393
- def sp ; self[6] end ; def sp=(oth) ; self[6] = oth end
394
- def rsp ; self[7] end ; def rsp=(oth) ; self[7] = oth end
395
- def ions ; self[8] end ; def ions=(oth) ; self[8] = oth end
396
- def count ; self[9] end ; def count=(oth) ; self[9] = oth end
397
- def tic ; self[10] end ; def tic=(oth) ; self[10] = oth end
398
- def prot ; self[11] end ; def prot=(oth) ; self[11] = oth end
399
- def base_name ; self[12] end ; def base_name=(oth) ; self[12] = oth end
400
- def first_scan ; self[13] end ; def first_scan=(oth) ; self[13] = oth end
401
- def last_scan ; self[14] end ; def last_scan=(oth) ; self[14] = oth end
402
- def peptide_probability ; self[15] end ; def peptide_probability=(oth) ; self[15] = oth end
403
- def file ; self[16] end # we define a writer below
404
- def _num_prots ; self[17] end ; def _num_prots=(oth) ; self[17] = oth end
405
- def _first_prot ; self[18] end ; def _first_prot=(oth) ; self[18] = oth end
406
394
 
407
395
  ## other accessors:
408
396
  def probability ; self[15] end
397
+ def mh ; self[1] end
409
398
 
410
- #ind_keys.keys do |k|
411
- # self.module_eval( "def #{k} ; self[#{ind_keys[k]}] end ; def #{k}=(oth) ; self[#{ind_keys[k]} = oth end ", __FILE__, __LINE__ )
412
- #end
413
- @@arr_size = ind_keys.size
414
- ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
415
- ind_keys.merge!(ind_keys_w_eq)
416
- ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
417
-
418
- def initialize(args=nil)
419
- super(@@arr_size.size)
420
- if args
421
- if args.is_a? Hash
422
- args.each do |k,v|
423
- self[@@ind[k]] = v
424
- end
425
- end
426
- end
399
+ # This is not a true ppm since it should be divided by the actual mh instead
400
+ # of the theoretical (but it is as close as we can get for this object)
401
+ def ppm
402
+ 1.0e6 * (self[2].abs/self[1])
403
+ #1.0e6 * (self.deltamass.abs/self.mh)
427
404
  end
428
405
 
429
-
430
406
  # returns array of values of the attributes given (as symbols)
431
407
  def get(*args)
432
408
  args.collect do |arg|
@@ -463,25 +439,30 @@ class SpecID::Bioworks::Pep < Array
463
439
  [base_name, first_scan, last_scan]
464
440
  end
465
441
 
442
+ tmp_verb = $VERBOSE
443
+ $VERBOSE = nil
466
444
  def file=(arg)
467
445
  ## Set these vals by index:
468
446
  #puts "AERRG: #{arg}"
469
447
  self[16] = arg
470
448
  self[12,3] = self.class.extract_file_info(arg)
471
449
  end
450
+ $VERBOSE = tmp_verb
472
451
 
473
452
  def inspect
474
- "<SpecID::Bioworks::Pep sequence: #{sequence}, mass: #{mass}, deltamass: #{deltamass}, charge: #{charge}, xcorr: #{xcorr}, deltacn: #{deltacn} prot: #{prot} base_name: #{base_name} first_scan: #{first_scan} last_scan: #{last_scan} file: #{file} peptide_probability: #{peptide_probability}>"
453
+ "<Bioworks::Pep sequence: #{sequence}, mass: #{mass}, deltamass: #{deltamass}, charge: #{charge}, xcorr: #{xcorr}, deltacn: #{deltacn}, prots(count):#{prots.size}, base_name: #{base_name}, first_scan: #{first_scan}, last_scan: #{last_scan}, file: #{file}, peptide_probability: #{peptide_probability}, aaseq:#{aaseq}>"
454
+
455
+
475
456
  end
476
457
 
477
458
  def set_from_hash(hash)
478
459
  self[0,11] = [hash["sequence"], hash["mass"], hash["deltamass"], hash["charge"], hash["xcorr"], hash["deltacn"], hash["sp"], hash["rsp"], hash["ions"], hash["count"], hash["tic"]]
479
460
  self.file = hash["file"]
480
461
  self[15] = hash["peptide_probability"].to_f
462
+ self[19] = SpecID::Pep.sequence_to_aaseq(self[0]) ## aaseq
481
463
  end
482
464
 
483
- def set_from_xml_stream(fh, prot)
484
- self[11] = prot
465
+ def set_from_xml_stream(fh)
485
466
  hash = {}
486
467
  while line = fh.gets
487
468
  if line =~ @@att_re
@@ -491,7 +472,6 @@ class SpecID::Bioworks::Pep < Array
491
472
  elsif line =~ @@end_pep_re
492
473
  set_from_hash(hash)
493
474
  #puts "SELF[12]: #{self[12]}"
494
- unless self[12] then self[12] = prot.bioworks.global_filename end
495
475
  #puts "SELF[12]: #{self[12]}"
496
476
  break
497
477
  else