mspire 0.3.1 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/Rakefile +2 -2
  2. data/bin/bioworks_to_pepxml.rb +15 -3
  3. data/bin/ms_to_lmat.rb +2 -1
  4. data/bin/sqt_group.rb +26 -0
  5. data/changelog.txt +36 -0
  6. data/lib/ms/msrun.rb +3 -1
  7. data/lib/ms/parser/mzdata/dom.rb +14 -14
  8. data/lib/ms/scan.rb +3 -3
  9. data/lib/mspire.rb +1 -1
  10. data/lib/sample_enzyme.rb +39 -0
  11. data/lib/spec_id.rb +18 -0
  12. data/lib/spec_id/aa_freqs.rb +6 -9
  13. data/lib/spec_id/digestor.rb +16 -17
  14. data/lib/spec_id/mass.rb +63 -1
  15. data/lib/spec_id/parser/proph.rb +101 -2
  16. data/lib/spec_id/precision/filter.rb +3 -2
  17. data/lib/spec_id/precision/filter/cmdline.rb +3 -1
  18. data/lib/spec_id/precision/filter/output.rb +1 -0
  19. data/lib/spec_id/precision/prob.rb +88 -21
  20. data/lib/spec_id/precision/prob/cmdline.rb +28 -16
  21. data/lib/spec_id/precision/prob/output.rb +8 -2
  22. data/lib/spec_id/proph/pep_summary.rb +25 -12
  23. data/lib/spec_id/sequest.rb +28 -0
  24. data/lib/spec_id/sequest/pepxml.rb +142 -197
  25. data/lib/spec_id/sqt.rb +349 -0
  26. data/lib/spec_id/srf.rb +33 -23
  27. data/lib/validator.rb +40 -57
  28. data/lib/validator/aa.rb +3 -90
  29. data/lib/validator/aa_est.rb +112 -0
  30. data/lib/validator/cmdline.rb +163 -31
  31. data/lib/validator/decoy.rb +15 -7
  32. data/lib/validator/digestion_based.rb +5 -4
  33. data/lib/validator/q_value.rb +32 -0
  34. data/script/peps_per_bin.rb +67 -0
  35. data/script/sqt_to_meta.rb +24 -0
  36. data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
  37. data/specs/bin/fasta_shaker_spec.rb +2 -2
  38. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
  39. data/specs/bin/filter_and_validate_spec.rb +25 -6
  40. data/specs/bin/ms_to_lmat_spec.rb +2 -2
  41. data/specs/bin/prob_validate_spec.rb +5 -3
  42. data/specs/sample_enzyme_spec.rb +86 -1
  43. data/specs/spec_helper.rb +11 -9
  44. data/specs/spec_id/bioworks_spec.rb +2 -1
  45. data/specs/spec_id/precision/filter_spec.rb +5 -5
  46. data/specs/spec_id/precision/prob_spec.rb +0 -67
  47. data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
  48. data/specs/spec_id/protein_summary_spec.rb +4 -4
  49. data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
  50. data/specs/spec_id/sequest_spec.rb +38 -0
  51. data/specs/spec_id/sqt_spec.rb +111 -3
  52. data/specs/spec_id_spec.rb +2 -0
  53. data/specs/transmem/phobius_spec.rb +3 -1
  54. data/specs/transmem/toppred_spec.rb +1 -1
  55. data/specs/validator/aa_est_spec.rb +66 -0
  56. data/specs/validator/aa_spec.rb +1 -68
  57. data/specs/validator/background_spec.rb +2 -0
  58. data/specs/validator/bias_spec.rb +3 -27
  59. data/specs/validator/decoy_spec.rb +2 -2
  60. data/specs/validator/transmem_spec.rb +2 -1
  61. data/test_files/small.sqt +87 -0
  62. metadata +312 -293
data/Rakefile CHANGED
@@ -238,8 +238,8 @@ spec = Gem::Specification.new do |s|
238
238
  s.rdoc_options = rdoc_options
239
239
  s.extra_rdoc_files = rdoc_extra_includes
240
240
  s.executables = FL["bin/*"].map {|file| File.basename(file) }
241
- s.add_dependency('libjtp', '~> 0.2.12')
242
- s.add_dependency('axml')
241
+ s.add_dependency('libjtp', '~> 0.2.13')
242
+ s.add_dependency('axml', '~> 0.0.0')
243
243
  s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
244
244
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
245
245
  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
@@ -43,14 +43,26 @@ opt_obj = OptionParser.new do |op|
43
43
  op.separator "Options:"
44
44
  op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
45
45
  op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
46
+ op.on('--sample_enzyme <type>', "For digested samples run with no enzymatic",
47
+ "search constraint, the enzyme used for",
48
+ "digestion, options: 'Trypsin_KR_P'") {|v|
49
+ case v
50
+ when 'Trypsin_KR_P'
51
+ opt.sample_enzyme = SampleEnzyme.new("trypsin")
52
+ else
53
+ raise ArgumentError, "Don't recognize enzyme: #{v}"
54
+ end
55
+ }
56
+ op.on('-a', '--all_hits', "includes all hits, not just top xcorr") {|v| opt.all_hits = v }
57
+ op.on('--deltacn_orig', "top hit deltacn = 0.0, (no deltacnstar att)") {|v| opt.deltacn_orig = v }
58
+ op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
59
+ op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
46
60
 
47
61
  op.separator ""
48
62
  op.separator "bioworks.xml files may require additional options:"
49
63
  op.separator ""
50
64
  op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
51
65
  op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
52
- op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
53
- op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
54
66
  op.on('--model <LCQ|Orbi|string>', "MS model (xml) d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
55
67
  op.on('--mass_analyzer <string>', "Mass Analyzer (xml) d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
56
68
 
@@ -131,5 +143,5 @@ opt.params ||= DEFAULT_PARAMS_FILE
131
143
  opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
132
144
  opt.model ||= DEFAULT_MS_MODEL
133
145
 
134
- xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
146
+ xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true, :all_hits => opt.all_hits, :deltacn_orig => opt.deltacn_orig, :sample_enzyme => opt.sample_enzyme})
135
147
 
data/bin/ms_to_lmat.rb CHANGED
@@ -47,7 +47,8 @@ ARGV.each do |file|
47
47
  }
48
48
  args.merge!(opt)
49
49
  lmat = LMat.new.from_times_and_spectra(times, spectra, args)
50
- outfile = file.sub(/\.mzXML$/, opt[:newext])
50
+ ext = File.extname(file)
51
+ outfile = file.sub(/#{Regexp.escape(ext)}$/, opt[:newext])
51
52
  if args[:ascii]
52
53
  outfile << "a"
53
54
  lmat.print(outfile)
data/bin/sqt_group.rb ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'spec_id/sqt'
5
+
6
+ $OUTFILE = 'bioworks.sqg'
7
+
8
+ opts = OptionParser.new do |op|
9
+ op.banner = "usage: #{File.basename(__FILE__)} <file1>.sqt <file2>.sqt ..."
10
+ op.separator "outputs: 'bioworks.sqg'"
11
+ op.separator ""
12
+ op.separator " A '.sqg' file is an ascii text file with a list"
13
+ op.separator " of the sqt files (full path names) in that group."
14
+ op.separator ""
15
+ op.on('-o', '--output <filename>', 'a different output name') {|v| $OUTFILE }
16
+ end
17
+
18
+ if ARGV.size == 0
19
+ puts opts
20
+ exit
21
+ end
22
+
23
+ obj = SQTGroup.new
24
+ obj.filenames = ARGV.to_a
25
+ obj.to_sqg($OUTFILE)
26
+
data/changelog.txt CHANGED
@@ -126,3 +126,39 @@ interfaces and implementations (using ArrayClass)
126
126
  ## version 0.3.1
127
127
 
128
128
  1. Bug fix in srf filtering (num_hits adjusted)
129
+
130
+ ## version 0.3.2
131
+
132
+ 1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
+ now.
134
+
135
+ ## version 0.3.3
136
+
137
+ 1. Worked out minor kinks in prob_precision.rb
138
+
139
+ ## version 0.3.4
140
+
141
+ 1. filters >= +3 charged ions now.
142
+
143
+ ## version 0.3.5
144
+
145
+ 1. fixed creation of background distribution in validators (hash_by base_name,
146
+ first_scan, charge now)
147
+
148
+ ## version 0.3.6
149
+
150
+ 1. split off bad_aa_est from bad_aa
151
+
152
+ ## version 0.3.7
153
+
154
+ 1. can deal with No_Enzyme searches now (while still capable of setting
155
+ sample_enzyme)
156
+
157
+ ## version 0.3.8
158
+
159
+ 1. can set a decoy to target ratio for decoy validation
160
+ 2. added mass calculator in Mass::Calculator
161
+
162
+ ## version 0.3.9
163
+
164
+ 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
data/lib/ms/msrun.rb CHANGED
@@ -30,7 +30,9 @@ class MS::MSRun
30
30
  myopts = opts.dup ; myopts[:msrun] = self
31
31
  if file
32
32
  filetype_and_version = MS::Parser.filetype_and_version(file)
33
- MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
33
+ parser = MS::Parser.new(filetype_and_version, :msrun)
34
+ parser.parse(file, myopts)
35
+ #MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
34
36
  (@filetype, @version) = filetype_and_version
35
37
  end
36
38
  end
@@ -51,23 +51,20 @@ class MS::Parser::MzData::DOM
51
51
  # %w(num msLevel retentionTime startMz endMz precursors spectrum)
52
52
 
53
53
  root = get_root_node_from_file(file)
54
- scan_count = 0
55
54
  description = root.find_first('child::description')
56
55
  bioworks33 = is_bioworks33?(description)
57
56
  spectrum_list = description.next
58
- scans =
59
- if bioworks33
60
- [] #bioworks33 gives incorrect scan numbers!
61
- else
62
- Array(spectrum_list['count'].to_i)
63
- end
57
+
58
+ scans = []
59
+
60
+ # bioworks 33 gives incorrect scan count
61
+ stated_num_scans = spectrum_list['count'].to_i
64
62
 
65
63
  # if I move from node to node, it means I've checked that it's a sequence
66
64
  # and that the elements are req'd
67
65
  if spectrum_list.child?
68
66
  spectrum_n = spectrum_list.child
69
67
  loop do
70
- scan_count += 1
71
68
  scan = MS::Scan.new(9)
72
69
  id = spectrum_n["id"].to_i
73
70
  id_to_scan_hash[id] = scan
@@ -81,11 +78,9 @@ class MS::Parser::MzData::DOM
81
78
  spec_inst_n = spec_settings_n.find_first('child::spectrumInstrument')
82
79
  scan[1] = spec_inst_n['msLevel'].to_i
83
80
 
84
- if bioworks33
85
- scans << scan # we can't trust the scan count!
86
- else
87
- scans[scan_count] = scan
88
- end
81
+ # we could use a scan_count, but in bioworks 33, we can't trust the
82
+ # scan count! So, we just collect them
83
+ scans << scan
89
84
 
90
85
  scan[3] = spec_inst_n['mzRangeStart'].to_f
91
86
  scan[4] = spec_inst_n['mzRangeStop'].to_f
@@ -149,7 +144,12 @@ class MS::Parser::MzData::DOM
149
144
  MS::MSRun.add_parent_scan(scans, opts[:spectra])
150
145
  end
151
146
  msrun_obj.scans = scans
152
- msrun_obj.scan_count = scan_count
147
+ msrun_obj.scan_count = scans.size
148
+ unless bioworks33 # we know the scan count is off here
149
+ if msrun_obj.scan_count != stated_num_scans
150
+ warn "num collected scans (#{scans.size}) does not agree with stated num scans (#{stated_num_scans})!"
151
+ end
152
+ end
153
153
  msrun_obj.start_time = msrun_obj.scans.first.time
154
154
  msrun_obj.end_time = msrun_obj.scans.last.time
155
155
  end
data/lib/ms/scan.rb CHANGED
@@ -28,7 +28,7 @@ class MS::Scan
28
28
  atts = %w(num ms_level time start_mz end_mz)
29
29
  display = atts.map do |att|
30
30
  if val = send(att.to_sym)
31
- "@#{att}=#{val}"
31
+ "#{att}=#{val}"
32
32
  else
33
33
  nil
34
34
  end
@@ -38,9 +38,9 @@ class MS::Scan
38
38
  if spectrum
39
39
  spectrum.mz.size
40
40
  else
41
- nil
41
+ 'nil'
42
42
  end
43
- "<MS::Scan:#{__id__} " + display.join(", ") + "@precursors=#{precursors.inspect}" + "@spectrum=size:#{spec_display}" + ">"
43
+ "<MS::Scan:#{__id__} " + display.join(", ") + " precursors=#{precursors.inspect}" + " spectrum(size)=#{spec_display}" + " >"
44
44
  end
45
45
 
46
46
  # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
data/lib/mspire.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Mspire
3
- Version = '0.3.1'
3
+ Version = '0.3.9'
4
4
  end
data/lib/sample_enzyme.rb CHANGED
@@ -23,6 +23,7 @@ class SampleEnzyme
23
23
  # For other enzymes, you must set :cut, :no_cut, :name, and :sense
24
24
  # will yield the object if you want to set the values that way
25
25
  def initialize(name=nil)
26
+ @num_missed_cleavages_regex = nil
26
27
  @sense = nil
27
28
  @cut = nil
28
29
  @no_cut = nil
@@ -62,6 +63,44 @@ class SampleEnzyme
62
63
  self.new.from_pepxml_node(node)
63
64
  end
64
65
 
66
+ # takes an amino acid sequence (e.g., -.PEPTIDK.L)
67
+ # returns the number of missed cleavages
68
+ def num_missed_cleavages(aaseq)
69
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
70
+ @num_missed_cleavages_regex =
71
+ if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
72
+ else
73
+ regex_string = "[#{@cut}]"
74
+ if @no_cut and @no_cut != ''
75
+ regex_string << "[^#{@no_cut}]"
76
+ end
77
+ /#{regex_string}/
78
+ end
79
+ arr = aaseq.scan(@num_missed_cleavages_regex)
80
+ num = arr.size
81
+ if aaseq[-1,1] =~ @num_missed_cleavages_regex
82
+ num -= 1
83
+ end
84
+ num
85
+ end
86
+
87
+ # requires full sequence (with heads and tails)
88
+ def num_tol_term(sequence)
89
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
90
+ no_cut = @no_cut || ''
91
+ num_tol = 0
92
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
93
+ last_of_middle = middle[-1,1]
94
+ first_of_middle = middle[0,1]
95
+ if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
96
+ num_tol += 1
97
+ end
98
+ if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
99
+ num_tol += 1
100
+ end
101
+ num_tol
102
+ end
103
+
65
104
  # returns all peptides of missed cleavages <= 'missed_cleavages'
66
105
  # so 2 missed cleavages will return all no missed cleavage peptides
67
106
  # all 1 missed cleavages and all 2 missed cleavages.
data/lib/spec_id.rb CHANGED
@@ -7,6 +7,7 @@ require 'spec_id/bioworks'
7
7
  require 'spec_id/sequest'
8
8
  require 'spec_id/proph/prot_summary'
9
9
  require 'spec_id_xml'
10
+ require 'spec_id/sqt'
10
11
  require 'spec_id/mass'
11
12
  require 'fasta'
12
13
 
@@ -71,6 +72,10 @@ module SpecID
71
72
  Proph::ProtSummary.new(file)
72
73
  when 'pepproph'
73
74
  Proph::PepSummary.new(file)
75
+ when 'sqg'
76
+ SQTGroup.new(file)
77
+ when 'sqt'
78
+ SQTGroup.new([file])
74
79
  else
75
80
  abort "UNRECOGNIZED file type for #{file}"
76
81
  end
@@ -447,6 +452,8 @@ module SpecID
447
452
  def self.file_type(file)
448
453
  if file =~ /\.srg$/
449
454
  return 'srg'
455
+ elsif file =~ /\.sqg$/
456
+ return 'sqg'
450
457
  end
451
458
  if IO.read(file, 7,438) == 'Enzyme:'
452
459
  return 'srf'
@@ -461,6 +468,17 @@ module SpecID
461
468
  elsif lines =~ /<msms_pipeline_analysis.*<peptideprophet_summary/m
462
469
  return 'pepproph'
463
470
  end
471
+ # assumes the header of a sqt file is less than 200 lines ...
472
+ 200.times do
473
+ line = fh.gets
474
+ if line
475
+ lines << line
476
+ else ; break
477
+ end
478
+ end
479
+ if lines =~ /^H\tDatabase/ and lines =~ /^H\tSQTGenerator/
480
+ return 'sqt'
481
+ end
464
482
  end
465
483
  end
466
484
 
@@ -3,30 +3,27 @@ require 'fasta'
3
3
  module SpecID ; end
4
4
 
5
5
  class SpecID::AAFreqs
6
- # a fasta object
7
- attr_accessor :fasta
8
6
  # hash by capital one-letter amino acid symbols giving the frequency of
9
7
  # seeing that amino acid. Frequencies should add to 1.
10
8
  attr_accessor :aafreqs
11
9
 
12
10
  # fasta is fasta object!
13
11
  def initialize(fasta=nil)
14
- @fasta = fasta
15
- if @fasta
16
- @aafreqs = calculate_frequencies(@fasta)
12
+ if fasta
13
+ @aafreqs = calculate_frequencies(fasta.prots)
17
14
  end
18
15
  end
19
16
 
20
- # creates an aafreqs hash based on fasta object
21
- def calculate_frequencies(fasta)
17
+ # takes an enumerable of objects responding to :aaseq and creates an aafreqs hash
18
+ def calculate_frequencies(objs)
22
19
  hash = {}
23
20
  total_aas = 0
24
21
  ('A'..'Z').each do |x|
25
22
  hash[x] = 0
26
23
  end
27
24
  hash['*'] = 0
28
- fasta.prots.each do |prot|
29
- aaseq = prot.aaseq
25
+ objs.each do |obj|
26
+ aaseq = obj.aaseq
30
27
  total_aas += aaseq.size
31
28
  aaseq.split('').each do |x|
32
29
  hash[x] += 1
@@ -100,38 +100,37 @@ class Digestor
100
100
  # The prot_aaseq is used if the mass_hash contains the keys
101
101
  # :add_C_term_protein or :add_N_term_protein
102
102
  #
103
+ # mass_hash requires the key :h_plus or :h depending on h_plus option.
103
104
  # prot_aaseqs is parallel to pep_aaseqs_ar where each is a group of
104
105
  # peptides matching a protein aaseq
105
- # returns another parallel array of passing proteins
106
+ # returns another parallel array of passing peptides per protein
106
107
  def limit_sizes(prot_aaseqs, pep_aaseqs_ar, min_mh, max_mh, mass_hash, h_plus=false)
107
108
  if mass_hash.key?(:add_C_term_protein) or mass_hash.key?(:add_N_term_protein)
108
109
  raise NotImplementedError, "need to add ability to change weights of peptides from the ends of proteins"
109
110
  else
110
111
  # figure out how much must be added to each peptide
111
112
  # include the h2o, the h, and N and C terminal static mods
112
- h_key = h_plus ? :h_plus : :h
113
- final_add = mass_hash[:h2o] + mass_hash[h_key]
113
+ h_plus_key = h_plus ? :h_plus : :h
114
+ extra_add = mass_hash[h_plus_key]
114
115
  [:add_N_term_peptide, :add_C_term_peptide].each do |sym|
115
116
  if mass_hash.key?(sym)
116
- final_add += mass_hash[sym]
117
+ extra_add += mass_hash[sym]
117
118
  end
118
119
  end
119
- hash_by_aa_string = {}
120
- mass_hash.each {|k,v| hash_by_aa_string[k.to_s] = mass_hash[k] }
120
+ mc = Mass::Calculator.new(mass_hash, extra_add)
121
+
122
+ masses_per_group = pep_aaseqs_ar.map do |pep_aaseqs|
123
+ mc.masses(pep_aaseqs)
124
+ end
121
125
 
122
- pep_aaseqs_ar.map do |pep_aaseqs|
123
- pep_aaseqs.select do |aaseq|
124
- sum = 0.0
125
- aaseq.split('').each do |let|
126
- if !hash_by_aa_string.key? let
127
- puts 'NOT FOUND'
128
- p let
129
- end
130
- sum += hash_by_aa_string[let]
126
+ masses_per_group.zip(pep_aaseqs_ar).map do |masses, aaseqs|
127
+ passing = []
128
+ aaseqs.zip(masses) do |aaseq, mh_plus|
129
+ if ( (mh_plus >= min_mh) and (mh_plus <= max_mh) )
130
+ passing << aaseq
131
131
  end
132
- mh_plus = sum + final_add
133
- ( (mh_plus >= min_mh) and (mh_plus <= max_mh) )
134
132
  end
133
+ passing
135
134
  end
136
135
  end
137
136
  end
data/lib/spec_id/mass.rb CHANGED
@@ -29,13 +29,13 @@ class Mass
29
29
  :U => 150.95364, # (selenocysteine) http://www.matrix-science.com/help/aa_help.html
30
30
  :X => 118.805716, # the average of the mono masses of the 20 amino acids
31
31
  :* => 118.805716, # same as X
32
+ :Z => (129.04259 + 128.05858) / 2, # average glutamic acid and glutamine
32
33
 
33
34
  # elements etc.
34
35
  :h => 1.00783,
35
36
  :h_plus => 1.00728,
36
37
  :o => 15.9949146,
37
38
  :h2o => 18.01056,
38
-
39
39
  }
40
40
  AVG = {
41
41
  :A => 71.0788,
@@ -64,6 +64,7 @@ class Mass
64
64
  :U => 150.03, # (selenocysteine) http://www.matrix-science.com/help/aa_help.html
65
65
  :X => 118.88603, # the average of the masses of the 20 amino acids
66
66
  :* => 118.88603, # same as X
67
+ :Z => (129.1155+ 128.1307) / 2, # average glutamic acid and glutamine
67
68
 
68
69
  # elements etc.
69
70
  :h => 1.00794,
@@ -112,5 +113,66 @@ class Mass
112
113
  end
113
114
  copy_hash
114
115
  end
116
+
117
+ # returns an array of masses parallel to array passed in
118
+ # If you want the mass with H+, then pass in the mass as h_plus
119
+ # The mass hash must repond to
120
+ # :h2o (water)
121
+ # and at least the twenty amino acids (by string or symbol)
122
+ # The mass hash may respond to :add_N_term_peptide or :add_C_term_peptide
123
+ # in which case these will be added to the final mass
124
+ def self.masses(aaseqs, mass_hash=Mass::MONO, h_plus=0.0)
125
+ final_add = mass_hash[:h2o] + h_plus
126
+ [:add_N_term_peptide, :add_C_term_peptide].each do |sym|
127
+ if mass_hash.key?(sym)
128
+ final_add += mass_hash[sym]
129
+ end
130
+ end
131
+ hash_by_aa_string = {}
132
+ mass_hash.each {|k,v| hash_by_aa_string[k.to_s] = mass_hash[k] }
133
+
134
+ aaseqs.map do |pep_aaseqs|
135
+ sum = 0.0
136
+ aaseq.split('').each do |let|
137
+ sum += hash_by_aa_string[let]
138
+ end
139
+ mh_plus = sum + final_add
140
+ end
141
+ end
142
+
143
+
144
+ end
145
+
146
+ class Mass::Calculator
147
+
148
+ # mass_hash must respond to :h2o or 'h2o'. This is added to represent the
149
+ # tails of the peptide. add_extra is outside of that (e.g., an H+)
150
+ def initialize(mass_hash, add_extra=0.0)
151
+ @mass_hash = mass_hash_to_s(mass_hash)
152
+ @final_add = @mass_hash['h2o'] + add_extra
153
+ end
154
+
155
+ def mass_hash_to_s(mass_hash)
156
+ new_hash = {}
157
+ mass_hash.each do |k,v|
158
+ new_hash[k.to_s] = v
159
+ end
160
+ new_hash
161
+ end
162
+
163
+ def masses(aaseqs)
164
+ aaseqs.map do |aaseq|
165
+ sum = @final_add # <- add in the initialization
166
+ aaseq.split('').each do |let|
167
+ if @mass_hash.key? let
168
+ sum += @mass_hash[let]
169
+ else
170
+ abort "LETTER not found in mass_hash: #{let}"
171
+ end
172
+ end
173
+ sum
174
+ end
175
+ end
176
+
115
177
  end
116
178