mspire 0.3.1 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/Rakefile +2 -2
  2. data/bin/bioworks_to_pepxml.rb +15 -3
  3. data/bin/ms_to_lmat.rb +2 -1
  4. data/bin/sqt_group.rb +26 -0
  5. data/changelog.txt +36 -0
  6. data/lib/ms/msrun.rb +3 -1
  7. data/lib/ms/parser/mzdata/dom.rb +14 -14
  8. data/lib/ms/scan.rb +3 -3
  9. data/lib/mspire.rb +1 -1
  10. data/lib/sample_enzyme.rb +39 -0
  11. data/lib/spec_id.rb +18 -0
  12. data/lib/spec_id/aa_freqs.rb +6 -9
  13. data/lib/spec_id/digestor.rb +16 -17
  14. data/lib/spec_id/mass.rb +63 -1
  15. data/lib/spec_id/parser/proph.rb +101 -2
  16. data/lib/spec_id/precision/filter.rb +3 -2
  17. data/lib/spec_id/precision/filter/cmdline.rb +3 -1
  18. data/lib/spec_id/precision/filter/output.rb +1 -0
  19. data/lib/spec_id/precision/prob.rb +88 -21
  20. data/lib/spec_id/precision/prob/cmdline.rb +28 -16
  21. data/lib/spec_id/precision/prob/output.rb +8 -2
  22. data/lib/spec_id/proph/pep_summary.rb +25 -12
  23. data/lib/spec_id/sequest.rb +28 -0
  24. data/lib/spec_id/sequest/pepxml.rb +142 -197
  25. data/lib/spec_id/sqt.rb +349 -0
  26. data/lib/spec_id/srf.rb +33 -23
  27. data/lib/validator.rb +40 -57
  28. data/lib/validator/aa.rb +3 -90
  29. data/lib/validator/aa_est.rb +112 -0
  30. data/lib/validator/cmdline.rb +163 -31
  31. data/lib/validator/decoy.rb +15 -7
  32. data/lib/validator/digestion_based.rb +5 -4
  33. data/lib/validator/q_value.rb +32 -0
  34. data/script/peps_per_bin.rb +67 -0
  35. data/script/sqt_to_meta.rb +24 -0
  36. data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
  37. data/specs/bin/fasta_shaker_spec.rb +2 -2
  38. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
  39. data/specs/bin/filter_and_validate_spec.rb +25 -6
  40. data/specs/bin/ms_to_lmat_spec.rb +2 -2
  41. data/specs/bin/prob_validate_spec.rb +5 -3
  42. data/specs/sample_enzyme_spec.rb +86 -1
  43. data/specs/spec_helper.rb +11 -9
  44. data/specs/spec_id/bioworks_spec.rb +2 -1
  45. data/specs/spec_id/precision/filter_spec.rb +5 -5
  46. data/specs/spec_id/precision/prob_spec.rb +0 -67
  47. data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
  48. data/specs/spec_id/protein_summary_spec.rb +4 -4
  49. data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
  50. data/specs/spec_id/sequest_spec.rb +38 -0
  51. data/specs/spec_id/sqt_spec.rb +111 -3
  52. data/specs/spec_id_spec.rb +2 -0
  53. data/specs/transmem/phobius_spec.rb +3 -1
  54. data/specs/transmem/toppred_spec.rb +1 -1
  55. data/specs/validator/aa_est_spec.rb +66 -0
  56. data/specs/validator/aa_spec.rb +1 -68
  57. data/specs/validator/background_spec.rb +2 -0
  58. data/specs/validator/bias_spec.rb +3 -27
  59. data/specs/validator/decoy_spec.rb +2 -2
  60. data/specs/validator/transmem_spec.rb +2 -1
  61. data/test_files/small.sqt +87 -0
  62. metadata +312 -293
data/Rakefile CHANGED
@@ -238,8 +238,8 @@ spec = Gem::Specification.new do |s|
238
238
  s.rdoc_options = rdoc_options
239
239
  s.extra_rdoc_files = rdoc_extra_includes
240
240
  s.executables = FL["bin/*"].map {|file| File.basename(file) }
241
- s.add_dependency('libjtp', '~> 0.2.12')
242
- s.add_dependency('axml')
241
+ s.add_dependency('libjtp', '~> 0.2.13')
242
+ s.add_dependency('axml', '~> 0.0.0')
243
243
  s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
244
244
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
245
245
  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
@@ -43,14 +43,26 @@ opt_obj = OptionParser.new do |op|
43
43
  op.separator "Options:"
44
44
  op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v }
45
45
  op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v }
46
+ op.on('--sample_enzyme <type>', "For digested samples run with no enzymatic",
47
+ "search constraint, the enzyme used for",
48
+ "digestion, options: 'Trypsin_KR_P'") {|v|
49
+ case v
50
+ when 'Trypsin_KR_P'
51
+ opt.sample_enzyme = SampleEnzyme.new("trypsin")
52
+ else
53
+ raise ArgumentError, "Don't recognize enzyme: #{v}"
54
+ end
55
+ }
56
+ op.on('-a', '--all_hits', "includes all hits, not just top xcorr") {|v| opt.all_hits = v }
57
+ op.on('--deltacn_orig', "top hit deltacn = 0.0, (no deltacnstar att)") {|v| opt.deltacn_orig = v }
58
+ op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
59
+ op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
46
60
 
47
61
  op.separator ""
48
62
  op.separator "bioworks.xml files may require additional options:"
49
63
  op.separator ""
50
64
  op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v }
51
65
  op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v }
52
- op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZ_PATH}'") {|v| opt.mspath = v }
53
- op.on('--copy_mzxml', "copies mzXML files to outdir path"){|v| opt.copy_mzxml = v }
54
66
  op.on('--model <LCQ|Orbi|string>', "MS model (xml) d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v }
55
67
  op.on('--mass_analyzer <string>', "Mass Analyzer (xml) d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v }
56
68
 
@@ -131,5 +143,5 @@ opt.params ||= DEFAULT_PARAMS_FILE
131
143
  opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER
132
144
  opt.model ||= DEFAULT_MS_MODEL
133
145
 
134
- xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true})
146
+ xml_objs = Sequest::PepXML.set_from_bioworks(bioworks_file, {:params => opt.params, :ms_data => opt.mspath, :out_path => opt.outdir, :model => model, :backup_db_path => opt.dbpath, :copy_mzxml => opt.copy_mzxml, :ms_mass_analyzer => opt.mass_analyzer, :print => true, :all_hits => opt.all_hits, :deltacn_orig => opt.deltacn_orig, :sample_enzyme => opt.sample_enzyme})
135
147
 
data/bin/ms_to_lmat.rb CHANGED
@@ -47,7 +47,8 @@ ARGV.each do |file|
47
47
  }
48
48
  args.merge!(opt)
49
49
  lmat = LMat.new.from_times_and_spectra(times, spectra, args)
50
- outfile = file.sub(/\.mzXML$/, opt[:newext])
50
+ ext = File.extname(file)
51
+ outfile = file.sub(/#{Regexp.escape(ext)}$/, opt[:newext])
51
52
  if args[:ascii]
52
53
  outfile << "a"
53
54
  lmat.print(outfile)
data/bin/sqt_group.rb ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'spec_id/sqt'
5
+
6
+ $OUTFILE = 'bioworks.sqg'
7
+
8
+ opts = OptionParser.new do |op|
9
+ op.banner = "usage: #{File.basename(__FILE__)} <file1>.sqt <file2>.sqt ..."
10
+ op.separator "outputs: 'bioworks.sqg'"
11
+ op.separator ""
12
+ op.separator " A '.sqg' file is an ascii text file with a list"
13
+ op.separator " of the sqt files (full path names) in that group."
14
+ op.separator ""
15
+ op.on('-o', '--output <filename>', 'a different output name') {|v| $OUTFILE }
16
+ end
17
+
18
+ if ARGV.size == 0
19
+ puts opts
20
+ exit
21
+ end
22
+
23
+ obj = SQTGroup.new
24
+ obj.filenames = ARGV.to_a
25
+ obj.to_sqg($OUTFILE)
26
+
data/changelog.txt CHANGED
@@ -126,3 +126,39 @@ interfaces and implementations (using ArrayClass)
126
126
  ## version 0.3.1
127
127
 
128
128
  1. Bug fix in srf filtering (num_hits adjusted)
129
+
130
+ ## version 0.3.2
131
+
132
+ 1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
+ now.
134
+
135
+ ## version 0.3.3
136
+
137
+ 1. Worked out minor kinks in prob_precision.rb
138
+
139
+ ## version 0.3.4
140
+
141
+ 1. filters >= +3 charged ions now.
142
+
143
+ ## version 0.3.5
144
+
145
+ 1. fixed creation of background distribution in validators (hash_by base_name,
146
+ first_scan, charge now)
147
+
148
+ ## version 0.3.6
149
+
150
+ 1. split off bad_aa_est from bad_aa
151
+
152
+ ## version 0.3.7
153
+
154
+ 1. can deal with No_Enzyme searches now (while still capable of setting
155
+ sample_enzyme)
156
+
157
+ ## version 0.3.8
158
+
159
+ 1. can set a decoy to target ratio for decoy validation
160
+ 2. added mass calculator in Mass::Calculator
161
+
162
+ ## version 0.3.9
163
+
164
+ 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
data/lib/ms/msrun.rb CHANGED
@@ -30,7 +30,9 @@ class MS::MSRun
30
30
  myopts = opts.dup ; myopts[:msrun] = self
31
31
  if file
32
32
  filetype_and_version = MS::Parser.filetype_and_version(file)
33
- MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
33
+ parser = MS::Parser.new(filetype_and_version, :msrun)
34
+ parser.parse(file, myopts)
35
+ #MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
34
36
  (@filetype, @version) = filetype_and_version
35
37
  end
36
38
  end
@@ -51,23 +51,20 @@ class MS::Parser::MzData::DOM
51
51
  # %w(num msLevel retentionTime startMz endMz precursors spectrum)
52
52
 
53
53
  root = get_root_node_from_file(file)
54
- scan_count = 0
55
54
  description = root.find_first('child::description')
56
55
  bioworks33 = is_bioworks33?(description)
57
56
  spectrum_list = description.next
58
- scans =
59
- if bioworks33
60
- [] #bioworks33 gives incorrect scan numbers!
61
- else
62
- Array(spectrum_list['count'].to_i)
63
- end
57
+
58
+ scans = []
59
+
60
+ # bioworks 33 gives incorrect scan count
61
+ stated_num_scans = spectrum_list['count'].to_i
64
62
 
65
63
  # if I move from node to node, it means I've checked that it's a sequence
66
64
  # and that the elements are req'd
67
65
  if spectrum_list.child?
68
66
  spectrum_n = spectrum_list.child
69
67
  loop do
70
- scan_count += 1
71
68
  scan = MS::Scan.new(9)
72
69
  id = spectrum_n["id"].to_i
73
70
  id_to_scan_hash[id] = scan
@@ -81,11 +78,9 @@ class MS::Parser::MzData::DOM
81
78
  spec_inst_n = spec_settings_n.find_first('child::spectrumInstrument')
82
79
  scan[1] = spec_inst_n['msLevel'].to_i
83
80
 
84
- if bioworks33
85
- scans << scan # we can't trust the scan count!
86
- else
87
- scans[scan_count] = scan
88
- end
81
+ # we could use a scan_count, but in bioworks 33, we can't trust the
82
+ # scan count! So, we just collect them
83
+ scans << scan
89
84
 
90
85
  scan[3] = spec_inst_n['mzRangeStart'].to_f
91
86
  scan[4] = spec_inst_n['mzRangeStop'].to_f
@@ -149,7 +144,12 @@ class MS::Parser::MzData::DOM
149
144
  MS::MSRun.add_parent_scan(scans, opts[:spectra])
150
145
  end
151
146
  msrun_obj.scans = scans
152
- msrun_obj.scan_count = scan_count
147
+ msrun_obj.scan_count = scans.size
148
+ unless bioworks33 # we know the scan count is off here
149
+ if msrun_obj.scan_count != stated_num_scans
150
+ warn "num collected scans (#{scans.size}) does not agree with stated num scans (#{stated_num_scans})!"
151
+ end
152
+ end
153
153
  msrun_obj.start_time = msrun_obj.scans.first.time
154
154
  msrun_obj.end_time = msrun_obj.scans.last.time
155
155
  end
data/lib/ms/scan.rb CHANGED
@@ -28,7 +28,7 @@ class MS::Scan
28
28
  atts = %w(num ms_level time start_mz end_mz)
29
29
  display = atts.map do |att|
30
30
  if val = send(att.to_sym)
31
- "@#{att}=#{val}"
31
+ "#{att}=#{val}"
32
32
  else
33
33
  nil
34
34
  end
@@ -38,9 +38,9 @@ class MS::Scan
38
38
  if spectrum
39
39
  spectrum.mz.size
40
40
  else
41
- nil
41
+ 'nil'
42
42
  end
43
- "<MS::Scan:#{__id__} " + display.join(", ") + "@precursors=#{precursors.inspect}" + "@spectrum=size:#{spec_display}" + ">"
43
+ "<MS::Scan:#{__id__} " + display.join(", ") + " precursors=#{precursors.inspect}" + " spectrum(size)=#{spec_display}" + " >"
44
44
  end
45
45
 
46
46
  # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
data/lib/mspire.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Mspire
3
- Version = '0.3.1'
3
+ Version = '0.3.9'
4
4
  end
data/lib/sample_enzyme.rb CHANGED
@@ -23,6 +23,7 @@ class SampleEnzyme
23
23
  # For other enzymes, you must set :cut, :no_cut, :name, and :sense
24
24
  # will yield the object if you want to set the values that way
25
25
  def initialize(name=nil)
26
+ @num_missed_cleavages_regex = nil
26
27
  @sense = nil
27
28
  @cut = nil
28
29
  @no_cut = nil
@@ -62,6 +63,44 @@ class SampleEnzyme
62
63
  self.new.from_pepxml_node(node)
63
64
  end
64
65
 
66
+ # takes an amino acid sequence (e.g., -.PEPTIDK.L)
67
+ # returns the number of missed cleavages
68
+ def num_missed_cleavages(aaseq)
69
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
70
+ @num_missed_cleavages_regex =
71
+ if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
72
+ else
73
+ regex_string = "[#{@cut}]"
74
+ if @no_cut and @no_cut != ''
75
+ regex_string << "[^#{@no_cut}]"
76
+ end
77
+ /#{regex_string}/
78
+ end
79
+ arr = aaseq.scan(@num_missed_cleavages_regex)
80
+ num = arr.size
81
+ if aaseq[-1,1] =~ @num_missed_cleavages_regex
82
+ num -= 1
83
+ end
84
+ num
85
+ end
86
+
87
+ # requires full sequence (with heads and tails)
88
+ def num_tol_term(sequence)
89
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
90
+ no_cut = @no_cut || ''
91
+ num_tol = 0
92
+ first, middle, last = SpecID::Pep.split_sequence(sequence)
93
+ last_of_middle = middle[-1,1]
94
+ first_of_middle = middle[0,1]
95
+ if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
96
+ num_tol += 1
97
+ end
98
+ if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
99
+ num_tol += 1
100
+ end
101
+ num_tol
102
+ end
103
+
65
104
  # returns all peptides of missed cleavages <= 'missed_cleavages'
66
105
  # so 2 missed cleavages will return all no missed cleavage peptides
67
106
  # all 1 missed cleavages and all 2 missed cleavages.
data/lib/spec_id.rb CHANGED
@@ -7,6 +7,7 @@ require 'spec_id/bioworks'
7
7
  require 'spec_id/sequest'
8
8
  require 'spec_id/proph/prot_summary'
9
9
  require 'spec_id_xml'
10
+ require 'spec_id/sqt'
10
11
  require 'spec_id/mass'
11
12
  require 'fasta'
12
13
 
@@ -71,6 +72,10 @@ module SpecID
71
72
  Proph::ProtSummary.new(file)
72
73
  when 'pepproph'
73
74
  Proph::PepSummary.new(file)
75
+ when 'sqg'
76
+ SQTGroup.new(file)
77
+ when 'sqt'
78
+ SQTGroup.new([file])
74
79
  else
75
80
  abort "UNRECOGNIZED file type for #{file}"
76
81
  end
@@ -447,6 +452,8 @@ module SpecID
447
452
  def self.file_type(file)
448
453
  if file =~ /\.srg$/
449
454
  return 'srg'
455
+ elsif file =~ /\.sqg$/
456
+ return 'sqg'
450
457
  end
451
458
  if IO.read(file, 7,438) == 'Enzyme:'
452
459
  return 'srf'
@@ -461,6 +468,17 @@ module SpecID
461
468
  elsif lines =~ /<msms_pipeline_analysis.*<peptideprophet_summary/m
462
469
  return 'pepproph'
463
470
  end
471
+ # assumes the header of a sqt file is less than 200 lines ...
472
+ 200.times do
473
+ line = fh.gets
474
+ if line
475
+ lines << line
476
+ else ; break
477
+ end
478
+ end
479
+ if lines =~ /^H\tDatabase/ and lines =~ /^H\tSQTGenerator/
480
+ return 'sqt'
481
+ end
464
482
  end
465
483
  end
466
484
 
@@ -3,30 +3,27 @@ require 'fasta'
3
3
  module SpecID ; end
4
4
 
5
5
  class SpecID::AAFreqs
6
- # a fasta object
7
- attr_accessor :fasta
8
6
  # hash by capital one-letter amino acid symbols giving the frequency of
9
7
  # seeing that amino acid. Frequencies should add to 1.
10
8
  attr_accessor :aafreqs
11
9
 
12
10
  # fasta is fasta object!
13
11
  def initialize(fasta=nil)
14
- @fasta = fasta
15
- if @fasta
16
- @aafreqs = calculate_frequencies(@fasta)
12
+ if fasta
13
+ @aafreqs = calculate_frequencies(fasta.prots)
17
14
  end
18
15
  end
19
16
 
20
- # creates an aafreqs hash based on fasta object
21
- def calculate_frequencies(fasta)
17
+ # takes an enumerable of objects responding to :aaseq and creates an aafreqs hash
18
+ def calculate_frequencies(objs)
22
19
  hash = {}
23
20
  total_aas = 0
24
21
  ('A'..'Z').each do |x|
25
22
  hash[x] = 0
26
23
  end
27
24
  hash['*'] = 0
28
- fasta.prots.each do |prot|
29
- aaseq = prot.aaseq
25
+ objs.each do |obj|
26
+ aaseq = obj.aaseq
30
27
  total_aas += aaseq.size
31
28
  aaseq.split('').each do |x|
32
29
  hash[x] += 1
@@ -100,38 +100,37 @@ class Digestor
100
100
  # The prot_aaseq is used if the mass_hash contains the keys
101
101
  # :add_C_term_protein or :add_N_term_protein
102
102
  #
103
+ # mass_hash requires the key :h_plus or :h depending on h_plus option.
103
104
  # prot_aaseqs is parallel to pep_aaseqs_ar where each is a group of
104
105
  # peptides matching a protein aaseq
105
- # returns another parallel array of passing proteins
106
+ # returns another parallel array of passing peptides per protein
106
107
  def limit_sizes(prot_aaseqs, pep_aaseqs_ar, min_mh, max_mh, mass_hash, h_plus=false)
107
108
  if mass_hash.key?(:add_C_term_protein) or mass_hash.key?(:add_N_term_protein)
108
109
  raise NotImplementedError, "need to add ability to change weights of peptides from the ends of proteins"
109
110
  else
110
111
  # figure out how much must be added to each peptide
111
112
  # include the h2o, the h, and N and C terminal static mods
112
- h_key = h_plus ? :h_plus : :h
113
- final_add = mass_hash[:h2o] + mass_hash[h_key]
113
+ h_plus_key = h_plus ? :h_plus : :h
114
+ extra_add = mass_hash[h_plus_key]
114
115
  [:add_N_term_peptide, :add_C_term_peptide].each do |sym|
115
116
  if mass_hash.key?(sym)
116
- final_add += mass_hash[sym]
117
+ extra_add += mass_hash[sym]
117
118
  end
118
119
  end
119
- hash_by_aa_string = {}
120
- mass_hash.each {|k,v| hash_by_aa_string[k.to_s] = mass_hash[k] }
120
+ mc = Mass::Calculator.new(mass_hash, extra_add)
121
+
122
+ masses_per_group = pep_aaseqs_ar.map do |pep_aaseqs|
123
+ mc.masses(pep_aaseqs)
124
+ end
121
125
 
122
- pep_aaseqs_ar.map do |pep_aaseqs|
123
- pep_aaseqs.select do |aaseq|
124
- sum = 0.0
125
- aaseq.split('').each do |let|
126
- if !hash_by_aa_string.key? let
127
- puts 'NOT FOUND'
128
- p let
129
- end
130
- sum += hash_by_aa_string[let]
126
+ masses_per_group.zip(pep_aaseqs_ar).map do |masses, aaseqs|
127
+ passing = []
128
+ aaseqs.zip(masses) do |aaseq, mh_plus|
129
+ if ( (mh_plus >= min_mh) and (mh_plus <= max_mh) )
130
+ passing << aaseq
131
131
  end
132
- mh_plus = sum + final_add
133
- ( (mh_plus >= min_mh) and (mh_plus <= max_mh) )
134
132
  end
133
+ passing
135
134
  end
136
135
  end
137
136
  end
data/lib/spec_id/mass.rb CHANGED
@@ -29,13 +29,13 @@ class Mass
29
29
  :U => 150.95364, # (selenocysteine) http://www.matrix-science.com/help/aa_help.html
30
30
  :X => 118.805716, # the average of the mono masses of the 20 amino acids
31
31
  :* => 118.805716, # same as X
32
+ :Z => (129.04259 + 128.05858) / 2, # average glutamic acid and glutamine
32
33
 
33
34
  # elements etc.
34
35
  :h => 1.00783,
35
36
  :h_plus => 1.00728,
36
37
  :o => 15.9949146,
37
38
  :h2o => 18.01056,
38
-
39
39
  }
40
40
  AVG = {
41
41
  :A => 71.0788,
@@ -64,6 +64,7 @@ class Mass
64
64
  :U => 150.03, # (selenocysteine) http://www.matrix-science.com/help/aa_help.html
65
65
  :X => 118.88603, # the average of the masses of the 20 amino acids
66
66
  :* => 118.88603, # same as X
67
+ :Z => (129.1155+ 128.1307) / 2, # average glutamic acid and glutamine
67
68
 
68
69
  # elements etc.
69
70
  :h => 1.00794,
@@ -112,5 +113,66 @@ class Mass
112
113
  end
113
114
  copy_hash
114
115
  end
116
+
117
+ # returns an array of masses parallel to array passed in
118
+ # If you want the mass with H+, then pass in the mass as h_plus
119
+ # The mass hash must repond to
120
+ # :h2o (water)
121
+ # and at least the twenty amino acids (by string or symbol)
122
+ # The mass hash may respond to :add_N_term_peptide or :add_C_term_peptide
123
+ # in which case these will be added to the final mass
124
+ def self.masses(aaseqs, mass_hash=Mass::MONO, h_plus=0.0)
125
+ final_add = mass_hash[:h2o] + h_plus
126
+ [:add_N_term_peptide, :add_C_term_peptide].each do |sym|
127
+ if mass_hash.key?(sym)
128
+ final_add += mass_hash[sym]
129
+ end
130
+ end
131
+ hash_by_aa_string = {}
132
+ mass_hash.each {|k,v| hash_by_aa_string[k.to_s] = mass_hash[k] }
133
+
134
+ aaseqs.map do |pep_aaseqs|
135
+ sum = 0.0
136
+ aaseq.split('').each do |let|
137
+ sum += hash_by_aa_string[let]
138
+ end
139
+ mh_plus = sum + final_add
140
+ end
141
+ end
142
+
143
+
144
+ end
145
+
146
+ class Mass::Calculator
147
+
148
+ # mass_hash must respond to :h2o or 'h2o'. This is added to represent the
149
+ # tails of the peptide. add_extra is outside of that (e.g., an H+)
150
+ def initialize(mass_hash, add_extra=0.0)
151
+ @mass_hash = mass_hash_to_s(mass_hash)
152
+ @final_add = @mass_hash['h2o'] + add_extra
153
+ end
154
+
155
+ def mass_hash_to_s(mass_hash)
156
+ new_hash = {}
157
+ mass_hash.each do |k,v|
158
+ new_hash[k.to_s] = v
159
+ end
160
+ new_hash
161
+ end
162
+
163
+ def masses(aaseqs)
164
+ aaseqs.map do |aaseq|
165
+ sum = @final_add # <- add in the initialization
166
+ aaseq.split('').each do |let|
167
+ if @mass_hash.key? let
168
+ sum += @mass_hash[let]
169
+ else
170
+ abort "LETTER not found in mass_hash: #{let}"
171
+ end
172
+ end
173
+ sum
174
+ end
175
+ end
176
+
115
177
  end
116
178