ms-in_silico 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/MIT-LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2008, Regents of the University of Colorado.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
4
+ software and associated documentation files (the "Software"), to deal in the Software
5
+ without restriction, including without limitation the rights to use, copy, modify, merge,
6
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7
+ to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or
10
+ substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,55 @@
1
+ = {Ms-InSilico}[http://mspire.rubyforge.org/projects/ms-in_silico]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting in-silico calculations for mass spec data.
4
+
5
+ == Description
6
+
7
+ Ms-InSilico provides the following modules:
8
+
9
+ * Ms::InSilico::Digester (protein digestion)
10
+ * Ms::InSilico::Spectrum (peptide fragmentation)
11
+
12
+ Corresponding Tap[http://tap.rubyforge.org] tasks are also provided.
13
+
14
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
15
+ * Github[http://github.com/bahuvrihi/ms-in_silico/tree/master]
16
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
17
+
18
+ == Usage
19
+
20
+ require 'ms/in_silico/digester'
21
+ require 'ms/in_silico/spectrum'
22
+ include Ms::InSilico
23
+
24
+ trypsin = Digester['Trypsin']
25
+ peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
26
+ # => [
27
+ # 'MIVIGR',
28
+ # 'SIVHPYITNEYEPFAAEK',
29
+ # 'QQILSIMAG']
30
+
31
+ spectrum = Spectrum.new(peptides[0])
32
+ spectrum.parent_ion_mass
33
+ # => 688.417442373391
34
+
35
+ spectrum.series('b')
36
+ # => [
37
+ # 132.047761058391,
38
+ # 245.131825038791,
39
+ # 344.200238954991,
40
+ # 457.284302935391,
41
+ # 514.305766658991,
42
+ # 670.406877687091]
43
+
44
+ == Installation
45
+
46
+ Ms-InSilico is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
47
+
48
+ % gem install ms-in_silico
49
+
50
+ == Info
51
+
52
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
53
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
54
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
55
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,35 @@
1
+ require 'ms/in_silico/digester'
2
+
3
+ module Ms
4
+ module InSilico
5
+ # Ms::InSilico::Digest::manifest digest a protein sequence into peptides
6
+ # Digest a protein sequence into an array of peptides.
7
+ #
8
+ # % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --+ dump --no-audit
9
+ # I[14:37:55] digest MIVIGRSIVHP... to 3 peptides
10
+ # # date: 2008-09-15 14:37:55
11
+ # ---
12
+ # ms/in_silico/digest (23483900):
13
+ # - - MIVIGR
14
+ # - SIVHPYITNEYEPFAAEK
15
+ # - QQILSIMAG
16
+ #
17
+ class Digest < Tap::Task
18
+
19
+ config :digester, 'Trypsin' # the name of the digester
20
+ config :max_misses, 0, &c.integer # the max # of missed cleavage sites
21
+ config :site_digest, false, &c.boolean # digest to sites (rather than sequences)
22
+
23
+ def process(sequence)
24
+ unless d = Digester[digester]
25
+ raise ArgumentError, "unknown digester: #{digester}"
26
+ end
27
+
28
+ peptides = site_digest ? d.site_digest(sequence, max_misses): d.digest(sequence, max_misses)
29
+ log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
30
+ peptides
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,263 @@
1
+ require 'constants/library'
2
+ require 'strscan'
3
+
4
+ module Ms
5
+ module InSilico
6
+
7
+ # Digester splits a protein sequence into peptides at sites specified
8
+ # during initialization; in short Digester models a cleavage enzyme.
9
+ # Digesters support missed cleavage sites, and can return either the
10
+ # peptide strings or the cleavage sites.
11
+ #
12
+ # Digester includes {Constants::Library}[http://bioactive.rubyforge.org/constants/classes/Constants/Library.html],
13
+ # allowing access to many common digesters using Digester[]:
14
+ #
15
+ # trypsin = Digester['Trypsin']
16
+ # trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
17
+ # # => [
18
+ # # 'MIVIGR',
19
+ # # 'SIVHPYITNEYEPFAAEK',
20
+ # # 'QQILSIMAG']
21
+ #
22
+ # trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1)
23
+ # # => [
24
+ # # 'MIVIGR',
25
+ # # 'MIVIGRSIVHPYITNEYEPFAAEK',
26
+ # # 'SIVHPYITNEYEPFAAEK',
27
+ # # 'SIVHPYITNEYEPFAAEKQQILSIMAG',
28
+ # # 'QQILSIMAG'
29
+ # # ]
30
+ #
31
+ # trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1)
32
+ # # => [
33
+ # # [0,6],
34
+ # # [0,24],
35
+ # # [6,24],
36
+ # # [6,33],
37
+ # # [24,33]
38
+ # # ]
39
+ #
40
+ # ==== Enzymes
41
+ #
42
+ # Enzymes in the library were adapted from the default Mascot[http://www.matrixscience.com/]
43
+ # enzyme list. Currently supported enzymes include:
44
+ #
45
+ # * Arg-C
46
+ # * Asp-N
47
+ # * Asp-N_ambic
48
+ # * Chymotrypsin
49
+ # * CNBr
50
+ # * Lys-C
51
+ # * Lys-C/P
52
+ # * PepsinA
53
+ # * Tryp-CNBr
54
+ # * TrypChymo
55
+ # * Trypsin/P
56
+ # * V8-DE
57
+ # * V8-E
58
+ # * Trypsin
59
+ # * V8-E+Trypsin
60
+ # * V8-DE+Trypsin
61
+ #
62
+ # Several enzymes require two or more digesters, or functionality that
63
+ # is not provided by Digester, and so remain unsupported:
64
+ #
65
+ # * CNBr+Trypsin
66
+ # * Formic_acid
67
+ # * LysC+AspN
68
+ # * semiTrypsin
69
+ #
70
+ class Digester
71
+
72
+ # The name of the digester
73
+ attr_reader :name
74
+
75
+ # A string of residues at which cleavage occurs
76
+ attr_reader :cleave_str
77
+
78
+ # A c-terminal resitriction residue which prevents
79
+ # cleavage at a potential cleavage site (optional).
80
+ attr_reader :cterm_exception
81
+
82
+ # True if cleavage occurs at the c-terminus of a
83
+ # cleavage residue, false if cleavage occurs at
84
+ # the n-terminus.
85
+ attr_reader :cterm_cleavage
86
+
87
+ # a multiline whitespace regexp
88
+ WHITESPACE = /\s*/m
89
+
90
+ def initialize(name, cleave_str, cterm_exception=nil, cterm_cleavage=true)
91
+ regexp = []
92
+ 0.upto(cleave_str.length - 1) {|i| regexp << cleave_str[i, 1] }
93
+
94
+ @name = name
95
+ @cleave_str = cleave_str
96
+ @cleave_regexp = Regexp.new(regexp.join('|'))
97
+ @cterm_exception = case
98
+ when cterm_exception == nil || cterm_exception.empty? then nil
99
+ when cterm_exception.length == 1 then cterm_exception[0]
100
+ else
101
+ raise ArgumentError, "cterm exceptions must be a single residue: #{cterm_exception}"
102
+ end
103
+
104
+ @cterm_cleavage = cterm_cleavage
105
+ @scanner = StringScanner.new('')
106
+ end
107
+
108
+ # Returns sites of digestion sites in sequence, as determined by
109
+ # thecleave_regexp boundaries. The digestion sites correspond
110
+ # to the positions where a peptide begins and ends, such that
111
+ # [n, (n+1) - n] corresponds to the [index, length] for peptide n.
112
+ #
113
+ # d = Digester.new('Trypsin', 'KR', 'P')
114
+ # seq = "AARGGR"
115
+ # sites = d.cleavage_sites(seq) # => [0, 3, 6]
116
+ #
117
+ # seq[sites[0], sites[0+1] - sites[0]] # => "AAR"
118
+ # seq[sites[1], sites[1+1] - sites[1]] # => "GGR"
119
+ #
120
+ # Trailing whitespace is included in the fragment.
121
+ #
122
+ # seq = "AAR \n GGR"
123
+ # sites = d.cleavage_sites(seq) # => [0, 8, 11]
124
+ #
125
+ # seq[sites[0], sites[0+1] - sites[0]] # => "AAR \n "
126
+ # seq[sites[1], sites[1+1] - sites[1]] # => "GGR"
127
+ #
128
+ # The digested section of sequence may be specified using offset
129
+ # and length.
130
+ def cleavage_sites(seq, offset=0, length=seq.length-offset)
131
+ adjustment = cterm_cleavage ? 0 : 1
132
+ limit = offset + length
133
+
134
+ positions = [offset]
135
+ pos = scan(seq, offset, limit) do |pos|
136
+ positions << pos - adjustment
137
+ end
138
+
139
+ # add the final position
140
+ if pos < limit || positions.length == 1
141
+ positions << limit
142
+ end
143
+
144
+ positions
145
+ end
146
+
147
+ # Returns digestion sites of sequence as [start_index, end_index] pairs,
148
+ # allowing for missed cleavages. Digestion sites are determined using
149
+ # cleavage_sites; as in that method, the digested section of sequence
150
+ # may be specified using offset and length.
151
+ #
152
+ # Each [start_index, end_index] pair is yielded to the block, if given,
153
+ # and the collected results are returned.
154
+ def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
155
+ frag_sites = cleavage_sites(seq, offset, length)
156
+
157
+ overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
158
+ start_index = frag_sites[start_index]
159
+ end_index = frag_sites[end_index]
160
+
161
+ block_given? ? yield(start_index, end_index) : [start_index, end_index]
162
+ end
163
+ end
164
+
165
+ # Returns an array of peptides produced by digesting sequence, allowing for
166
+ # missed cleavage sites. Digestion sites are determined using cleavage_sites;
167
+ # as in that method, the digested section of sequence may be specified using
168
+ # offset and length.
169
+ def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
170
+ site_digest(seq, max_misses, offset, length).collect do |s, e|
171
+ seq[s, e-s]
172
+ end
173
+ end
174
+
175
+ protected
176
+
177
+ # The cleavage regexp used to identify cleavage sites
178
+ attr_reader :cleave_regexp # :nodoc:
179
+
180
+ # The scanner used to digest strings.
181
+ attr_reader :scanner # :nodoc:
182
+
183
+ # Scans seq between offset and limit for the cleave_regexp, skipping whitespace
184
+ # and being mindful of exception characters. The positions of the scanner at
185
+ # each match are yielded to the block.
186
+ def scan(seq, offset, limit) # :nodoc:
187
+ scanner.string = seq
188
+ scanner.pos = offset
189
+
190
+ while scanner.search_full(cleave_regexp, true, false)
191
+ scanner.search_full(WHITESPACE, true, false)
192
+ pos = scanner.pos
193
+
194
+ # skip if the next character is the exception character
195
+ next if cterm_exception != nil && seq[pos] == cterm_exception
196
+
197
+ # break if you scanned past the upper limit
198
+ break if pos > limit
199
+
200
+ yield pos
201
+ end
202
+
203
+ scanner.pos
204
+ end
205
+
206
+ # Performs an overlap-collect algorithm providing the start and end
207
+ # indicies of spans skipping up to max_misses boundaries.
208
+ def overlay(n, max_misses, offset) # :nodoc:
209
+ results = []
210
+ 0.upto(n-1) do |start_index|
211
+ 0.upto(max_misses) do |n_miss|
212
+ end_index = start_index + offset + n_miss
213
+ break if end_index == n
214
+
215
+ results << yield(start_index, end_index)
216
+ end
217
+ end
218
+ results
219
+ end
220
+
221
+ #
222
+ # Enzymes adapted from the default Mascot enzyme list.
223
+ #
224
+
225
+ class << self
226
+ protected
227
+
228
+ # Utility method to parse a mascot enzyme configuration
229
+ # string into a Digester.
230
+ def mascot_parse(str) # :nodoc:
231
+ name, sense, cleave_str, cterm_exception, independent, semi_specific = str.split(/ *\t */)
232
+ cterm_cleavage = case sense
233
+ when 'C-Term' then true
234
+ when 'N-Term' then false
235
+ else raise ArgumentError, "unknown sense: #{sense}"
236
+ end
237
+
238
+ new(name, cleave_str, cterm_exception, cterm_cleavage)
239
+ end
240
+ end
241
+
242
+ ARG_C = mascot_parse('Arg-C C-Term R P no no')
243
+ ASP_N = mascot_parse('Asp-N N-Term BD no no')
244
+ ASP_N_AMBIC = mascot_parse('Asp-N_ambic N-Term DE no no')
245
+ CHYMOTRYPSIN = mascot_parse('Chymotrypsin C-Term FLWY P no no')
246
+ CNBR = mascot_parse('CNBr C-Term M no no')
247
+ LYS_C = mascot_parse('Lys-C C-Term K P no no')
248
+ LYS_C_P = mascot_parse('Lys-C/P C-Term K no no')
249
+ PEPSIN_A = mascot_parse('PepsinA C-Term FL no no')
250
+ TRYP_CNBR = mascot_parse('Tryp-CNBr C-Term KMR P no no')
251
+ TRYP_CHYMO = mascot_parse('TrypChymo C-Term FKLRWY P no no')
252
+ TRYPSIN_P = mascot_parse('Trypsin/P C-Term KR no no')
253
+ V8_DE = mascot_parse('V8-DE C-Term BDEZ P no no')
254
+ V8_E = mascot_parse('V8-E C-Term EZ P no no')
255
+ TRYPSIN = mascot_parse('Trypsin C-Term KR P no no')
256
+ V8_E_TRYPSIN = mascot_parse('V8-E+Trypsin C-Term EKRZ P no no')
257
+ V8_DE_TRYPSIN = mascot_parse('V8-DE+Trypsin C-Term BDEKRZ P no no')
258
+
259
+ include Constants::Library
260
+ library.index_by_attribute :name
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,74 @@
1
+ require 'ms/in_silico/spectrum'
2
+
3
+ module Ms
4
+ module InSilico
5
+
6
+ # Ms::InSilico::Fragment::manifest calculates a theoretical ms/ms spectrum
7
+ #
8
+ # Calculates the parent ion mass and theoretical ms/ms spectrum for a
9
+ # peptide sequence. Configurations allow the specification of one or
10
+ # more fragmentation series to include, as well as charge, and intensity.
11
+ #
12
+ # % rap fragment TVQQEL --+ dump --no-audit
13
+ # # date: 2008-09-15 14:37:55
14
+ # ---
15
+ # ms/in_silico/fragment (:...:):
16
+ # - - 717.377745628191
17
+ # - - 102.054954926291
18
+ # - 132.101905118891
19
+ # - 201.123368842491
20
+ # - 261.144498215091
21
+ # - 329.181946353891
22
+ # - 389.203075726491
23
+ # - 457.240523865291
24
+ # - 517.261653237891
25
+ # - 586.283116961491
26
+ # - 616.330067154091
27
+ # - 699.367180941891
28
+ # - 717.377745628191
29
+ #
30
+ # In the output, the parent ion mass is given first, followed by an
31
+ # array of the sorted fragmentation data.
32
+ class Fragment < Tap::Task
33
+
34
+ # A block to validate a config input
35
+ # is an EmpericalFormula.
36
+ MOLECULE = lambda do |value|
37
+ case value
38
+ when Molecules::EmpiricalFormula then value
39
+ else Molecules::EmpiricalFormula.parse(value)
40
+ end
41
+ end
42
+
43
+ config :series, ['y', 'b'], &c.array # a list of the series to include
44
+ config :charge, 1, &c.integer # the charge for the parent ion
45
+ config :intensity, nil, &c.num_or_nil # a uniform intensity value
46
+ config :nterm, 'H', &MOLECULE # the n-terminal modification
47
+ config :cterm, 'OH', &MOLECULE # the c-terminal modification
48
+ config :sort, true, &c.switch # sorts the data by mass
49
+ config :unmask, true, &c.switch # remove masked (negative) masses
50
+
51
+ def process(peptide)
52
+ log :fragment, peptide
53
+ spec = spectrum(peptide)
54
+
55
+ masses = []
56
+ series.each {|s| masses.concat(spec.series(s)) }
57
+ masses.delete_if {|m| m < 0 } if unmask
58
+ masses.sort! if sort
59
+ masses.collect! {|m| [m, intensity] } if intensity
60
+
61
+ [spec.parent_ion_mass(charge), masses]
62
+ end
63
+
64
+ protected
65
+
66
+ # Returns a new Spectrum used in the calculation.
67
+ # Primarily a hook for custom spectra in subclasses.
68
+ def spectrum(peptide)
69
+ Spectrum.new(peptide, nterm, cterm)
70
+ end
71
+
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,450 @@
1
+ require 'molecules/libraries/residue'
2
+ require 'constants/libraries/particle'
3
+ require 'ms/in_silico'
4
+
5
+ module Ms
6
+ module InSilico
7
+
8
+ # Spectrum calculates the theoretical ion series produced by a fragmentation
9
+ # process such as collision induced disocciation (CID). The formula used to
10
+ # calculate the ion series were obtained from the {Matrix Science
11
+ # website}[http://www.matrixscience.com/]. Spectrum uses the
12
+ # {Constants}[http://bioactive.rubyforge.org/constants/] gem as the default
13
+ # source of element and particle masses.
14
+ #
15
+ # spec = Ms::InSilico::Spectrum.new('TVQQEL')
16
+ # spec.series('b')
17
+ # # => [
18
+ # # 102.054954926291,
19
+ # # 201.123368842491,
20
+ # # 329.181946353891,
21
+ # # 457.240523865291,
22
+ # # 586.283116961491,
23
+ # # 699.367180941891]
24
+ #
25
+ # spec.series('y')
26
+ # # => [
27
+ # # 717.377745628191,
28
+ # # 616.330067154091,
29
+ # # 517.261653237891,
30
+ # # 389.203075726491,
31
+ # # 261.144498215091,
32
+ # # 132.101905118891]
33
+ #
34
+ # ==== Formulae to Calculate Fragment Ion m/z values
35
+ #
36
+ # <em>Copied directly from the Matrix Science {fragmentation help
37
+ # section}[http://www.matrixscience.com/help/fragmentation_help.html]</em>
38
+ #
39
+ # [N] is the molecular mass of the neutral N-terminal group, [C] is the
40
+ # molecular mass of the neutral C-terminal group, [M] is molecular mass
41
+ # of the neutral amino acid residues. To obtain m/z values, add or
42
+ # subtract protons as required to obtain the required charge and divide
43
+ # by the number of charges. For example, to get a+, add 1 proton to the
44
+ # Mr value for a. To get a--, subtract 2 protons from the Mr value for
45
+ # a and divide by 2.
46
+ #
47
+ # Ion Type Neutral Mr
48
+ # a [N]+[M]-CHO
49
+ # a* a-NH3
50
+ # a� a-H2O
51
+ # b [N]+[M]-H
52
+ # b* b-NH3
53
+ # b� b-H2O
54
+ # c [N]+[M]+NH2
55
+ # d a - partial side chain
56
+ # v y - complete side chain
57
+ # w z - partial side chain
58
+ # x [C]+[M]+CO-H
59
+ # y [C]+[M]+H
60
+ # y* y-NH3
61
+ # y� y-H2O
62
+ # z [C]+[M]-NH2
63
+ #
64
+ # ==== Use of alternate masses
65
+ # By default a Spectrum will calculate the ion series' using the
66
+ # monoisotopic masses for each element. To calculate masses
67
+ # differently, provide a block to new; each Element will be
68
+ # passed to the block as needed, and the block should return
69
+ # the element mass used in the calculation.
70
+ #
71
+ # Alternatively, a subclass can override the mass method; all
72
+ # objects that need to be turned into a mass (nterm, cterm,
73
+ # a variety of molecules specified as strings, the elements,
74
+ # ELECTRON, etc) are passed to mass to yield the value used
75
+ # in any given calculation.
76
+ #
77
+ #--
78
+ # ALL of the collections could be sped up using inline
79
+ #++
80
+ class Spectrum
81
+ include Molecules
82
+ include Molecules::Libraries
83
+ include Constants::Libraries
84
+
85
+ class << self
86
+
87
+ def inherited(base)
88
+ base.instance_variable_set(:@residues_to_locate, @residues_to_locate.dup)
89
+ end
90
+
91
+ # A string of residues located by scan.
92
+ attr_accessor :residues_to_locate
93
+
94
+ # Adds residues to residues_to_locate (these residues
95
+ # will be located by scan). Generally used when some
96
+ # special fragmentation behavior occurs at specific
97
+ # residues. By default no residues are located.
98
+ #
99
+ # class Subclass < Spectrum
100
+ # locate_residues "PS"
101
+ # end
102
+ #
103
+ # Subclass.new('RPPGFSPFR').residue_locations
104
+ # # => {'P' => [1, 2, 6], 'S' => [5]}
105
+ #
106
+ # Calls to locate_residues are cumulative.
107
+ def locate_residues(residues)
108
+ @residues_to_locate += residues
109
+ end
110
+
111
+ # Scans the sequence to produce a ladder of masses and a
112
+ # hash of (residue, locations) pairs which indicate the
113
+ # indicies at which the residue occurs in sequence. The
114
+ # ladder corresponds to the M values described above.
115
+ #
116
+ # Returns [ladder, {residue => locations}].
117
+ #
118
+ # ==== Inputs
119
+ # sequence:: a string
120
+ # masses_by_byte:: an array of masses where the index of
121
+ # the mass is the byte of the
122
+ # corresponding residue.
123
+ # residues_to_locate:: a string of the residues to locate.
124
+ #
125
+ # Note: scan is an optimized utility function, but should
126
+ # be replaced by an inline function to do the same.
127
+ #
128
+ def scan(sequence, masses_by_byte, residues_to_locate)
129
+ locations = []
130
+ residues_to_locate.each_byte {|byte| locations[byte] = []}
131
+
132
+ mass = 0
133
+ ladder = []
134
+ sequence.each_byte do |byte|
135
+ mass += masses_by_byte[byte]
136
+ location = locations[byte]
137
+
138
+ location << ladder.length if location
139
+ ladder << mass
140
+ end
141
+
142
+ hash = {}
143
+ 0.upto(residues_to_locate.length-1) do |index|
144
+ letter = residues_to_locate[index, 1]
145
+ byte = letter[0]
146
+ hash[letter] = locations[byte]
147
+ end
148
+
149
+ [ladder, hash]
150
+ end
151
+ end
152
+
153
+ HYDROGEN = EmpiricalFormula.parse("H")
154
+ HYDROXIDE = EmpiricalFormula.parse("OH")
155
+ ELECTRON = Particle['Electron']
156
+
157
+ self.residues_to_locate = ""
158
+
159
+ # The peptide sequence.
160
+ attr_reader :sequence
161
+
162
+ # The n-terminal modification (default H)
163
+ attr_reader :nterm
164
+
165
+ # The c-terminal modification (default OH)
166
+ attr_reader :cterm
167
+
168
+ # An optional block used to calculate masses of molecules.
169
+ attr_reader :block
170
+
171
+ # A ladder of mass values corresponding to the
172
+ # M values used in the fragmentation formulae.
173
+ attr_reader :ladder
174
+
175
+ # A hash of (residue, [locations]) pairs where
176
+ # the locations are the indicies in sequence
177
+ # at which residue occurs.
178
+ attr_reader :residue_locations
179
+
180
+ # Initializes a new Spectrum using the specified n- and c-terminal
181
+ # modifications. Masses will be calculated using the block, if
182
+ # specified. If no block is specified, then the monoisoptopic
183
+ # masses will be used.
184
+ def initialize(sequence, nterm=HYDROGEN, cterm=HYDROXIDE, &block) # :yields: element
185
+ @sequence = sequence
186
+ @nterm = nterm
187
+ @cterm = cterm
188
+ @block = block
189
+
190
+ residue_masses = Residue.residue_index.collect do |residue|
191
+ next(0) if residue == nil
192
+ mass(residue)
193
+ end
194
+
195
+ @ladder, @residue_locations = self.class.scan(
196
+ sequence,
197
+ residue_masses,
198
+ self.class.residues_to_locate)
199
+
200
+ @series_hash = {}
201
+ @series_mask = {}
202
+ end
203
+
204
+ # Returns the mass of the parent ion for the sequence, given the charge.
205
+ def parent_ion_mass(charge=1)
206
+ (mass(nterm) + ladder.last + mass(cterm) + charge * proton_mass)/charge
207
+ end
208
+
209
+ # Returns the mass of a proton (ie Hydrogen minus an Electron)
210
+ def proton_mass
211
+ mass(HYDROGEN) - mass(ELECTRON)
212
+ end
213
+
214
+ # Retrieves the specfied series, assuming a charge of 1. A different charge
215
+ # can be specified for the series by using '+' and '-'. For example:
216
+ #
217
+ # f = Spectrum.new 'RPPGFSPFR'
218
+ # f.series('y') == f.y_series # => true
219
+ # f.series('b++') == f.b_series(2) # => true
220
+ # f.series('nladder-') == f.nladder_series(-1) # => true
221
+ #
222
+ # Series raises an error if the specified charge is zero.
223
+ def series(s)
224
+ s = s.to_s.strip
225
+ case s
226
+ when /^(immonium|nladder|cladder|[abcxyYz])(\+*)(-*)(\s[\+\-\s\w\d]+)?$/
227
+ series = $1
228
+ plus = $2
229
+ minus = $3
230
+ mod = $4.to_s.gsub(/\s/, "")
231
+
232
+ charge = case
233
+ when plus.empty? && minus.empty? then 1
234
+ when minus.empty? then plus.length
235
+ when plus.empty? then -minus.length
236
+ else
237
+ charge = plus.length - minus.length
238
+ raise ArgumentError.new("zero charge specified in series: #{s}") if charge == 0
239
+ charge
240
+ end
241
+
242
+ self.send("#{series}_series", charge, mod)
243
+ else
244
+ handle_unknown_series(s)
245
+ end
246
+ end
247
+
248
+ def immonium_series(charge=1, mod=nil)
249
+ get_series(:immonium, charge, mod) do
250
+ delta = mass(mod) - mass('CO')
251
+
252
+ previous = 0
253
+ series = []
254
+ ladder.each do |current|
255
+ series << (current - previous + delta + charge * proton_mass)/charge
256
+ previous = current
257
+ end
258
+ series
259
+ end
260
+ end
261
+
262
+ # [N]+[M]-CHO
263
+ def a_series(charge=1, mod=nil)
264
+ get_series(:a, charge, mod) do
265
+ delta = mass(mod) + mass(nterm) - mass('CHO') + charge * proton_mass
266
+ nterm_series(delta, charge)
267
+ end
268
+ end
269
+
270
+ # [N]+[M]-H
271
+ def b_series(charge=1, mod=nil)
272
+ get_series(:b, charge, mod) do
273
+ delta = mass(mod) + mass(nterm) - mass('H') + charge * proton_mass
274
+ nterm_series(delta, charge)
275
+ end
276
+ end
277
+
278
+ # [N]+[M]+NH2
279
+ def c_series(charge=1, mod=nil)
280
+ get_series(:c, charge, mod) do
281
+ delta = mass(mod) + mass(nterm) + mass('NH2') + charge * proton_mass
282
+ nterm_series(delta, charge)
283
+ end
284
+ end
285
+
286
+ # [M]+H20
287
+ #--
288
+ # Ask Peter about these as well... Currently I'm adding water to
289
+ # cap the ends, as if a hydrolysis reaction produced the ladder,
290
+ # then I'm adding H for charge... is this what is intended?
291
+ # Why not cladder[0] or cladder[-1]?
292
+ #++
293
+ def cladder_series(charge=1, mod=nil)
294
+ get_series(:cladder, charge, mod) do
295
+ delta = mass(mod) + mass('H2O') + charge * proton_mass
296
+ nterm_series(delta, charge)
297
+ end
298
+ end
299
+
300
+ # [C]+[M]+CO-H
301
+ def x_series(charge=1, mod=nil)
302
+ get_series(:x, charge, mod) do
303
+ delta = mass(mod) + ladder.last + mass(cterm) + mass('CO - H') + charge * proton_mass
304
+ cterm_series(delta, charge)
305
+ end
306
+ end
307
+
308
+ # [C]+[M]+H
309
+ def y_series(charge=1, mod=nil)
310
+ get_series(:y, charge, mod) do
311
+ delta = mass(mod) + ladder.last + mass(cterm) + mass('H') + charge * proton_mass
312
+ cterm_series(delta, charge)
313
+ end
314
+ end
315
+
316
+ # [C]+[M]-H
317
+ def Y_series(charge=1, mod=nil)
318
+ get_series(:Y, charge, mod) do
319
+ delta = mass(mod) + ladder.last + mass(cterm) - mass('H') + charge * proton_mass
320
+ cterm_series(delta, charge)
321
+ end
322
+ end
323
+
324
+ # [C]+[M]-NH2
325
+ def z_series(charge=1, mod=nil)
326
+ get_series(:z, charge, mod) do
327
+ delta = mass(mod) + ladder.last + mass(cterm) - mass('NH2') + charge * proton_mass
328
+ cterm_series(delta, charge)
329
+ end
330
+ end
331
+
332
+ # [M]+H20
333
+ #--
334
+ # Ask Peter about these as well... Currently I'm adding water to
335
+ # cap the ends, as if a hydrolysis reaction produced the ladder,
336
+ # then I'm adding H for charge... is this what is intended?
337
+ # Why not nladder[-1]?
338
+ #++
339
+ def nladder_series(charge=1, mod=nil)
340
+ get_series(:nladder, charge, mod) do
341
+ delta = mass(mod) + ladder.last + mass('H2O') + charge * proton_mass
342
+ cterm_series(delta, charge)
343
+ end
344
+ end
345
+
346
+ protected
347
+
348
+ # A hash holding all calculated series for self. Series are keyed
349
+ # by the type and charge of the series (ex: b1, b2, y1, y2).
350
+ attr_accessor :series_hash
351
+
352
+ # A hash holding the locations of residues that need to be masked (ie
353
+ # multiplied by -1) in a given series. Mask locations should be unique
354
+ # so that a given location will not be masked twice; the method
355
+ # mask_locations can assist in doing so. Series masks are keyed
356
+ # by the series type (ex: b, y).
357
+ attr_accessor :series_mask
358
+
359
+ # Calculates the mass of the molecule for a variety of input
360
+ # types:
361
+ #
362
+ # EmpiricalFormula molecule.mass(&block)
363
+ # Particle molecule.mass
364
+ # String EmpiricalFormula.mass(molecule, &block)
365
+ # Numeric molecule
366
+ # nil 0
367
+ #
368
+ def mass(molecule)
369
+
370
+ # note that Particles will not actually make use of the
371
+ # block, even though it is being passed to it.
372
+
373
+ case molecule
374
+ when EmpiricalFormula, Particle then molecule.mass(&block)
375
+ when String then EmpiricalFormula.mass(molecule, &block)
376
+ when nil then 0
377
+ when Numeric then molecule
378
+ else
379
+ raise "cannot calculate mass of: #{molecule}"
380
+ end
381
+ end
382
+
383
+ # Generates an n-terminal series (ex: a, b, or c) by adding delta
384
+ # to each element from ladder, and dividing by charge. Delta,
385
+ # therefore, should ALREADY take account of the protons added
386
+ # by charge.
387
+ def nterm_series(delta, charge)
388
+ ladder.collect {|m| (m + delta)/charge }
389
+ end
390
+
391
+ # Generates a c-terminal series (ex: x, y, or z) by subtracting each
392
+ # element from ladder from delta, and dividing by charge. Delta,
393
+ # therefore, should ALREADY take account of the protons added
394
+ # by charge.
395
+ def cterm_series(delta, charge)
396
+ series = ladder.collect {|m| (delta - m)/charge }
397
+ series.unshift(delta/charge)
398
+ series.pop
399
+ series
400
+ end
401
+
402
+ # Adds the specified locations to the series mask, ensuring that the
403
+ # specified locations will be unique within the mask. If overwrite
404
+ # is true, then the input locations will overwrite any existing mask
405
+ # locations.
406
+ def mask_locations(series, locations, overwrite=false)
407
+ locations = locations.collect do |location|
408
+ location < 0 ? ladder.length + location : location
409
+ end
410
+
411
+ if overwrite
412
+ series_mask[series] = locations.uniq
413
+ else
414
+ (series_mask[series] ||= []).concat(locations).uniq!
415
+ end
416
+ end
417
+
418
+ # Retrieves the series keyed by "#{key}#{charge}" in series_hash.
419
+ # If the series has not been initialized, the series will be
420
+ # initialized using the supplied block, and masked using the
421
+ # series_mask indicated by key (not "#{key}#{charge}").
422
+ def get_series(key, charge=nil, mod=nil)
423
+ series_hash["#{key}#{charge}#{mod}"] ||= mask(yield, key, mod)
424
+ end
425
+
426
+ # Mask the locations in the series by multiplying them by -1. Mask
427
+ # does NOT check to see if the location is negative or positive.
428
+ def mask(series, key, mod)
429
+ locations = series_mask[key]
430
+
431
+ unless mod == nil
432
+ mod_locations = series_mask["#{key}#{mod}"]
433
+ if mod_locations
434
+ locations += mod_locations
435
+ locations.uniq!
436
+ end
437
+ end
438
+
439
+ locations.each {|i| series[i] *= -1} unless locations == nil
440
+ series
441
+ end
442
+
443
+ # Hook to custom-handle an unknown series from the series method.
444
+ # By default, handle_unknown_series raises an ArgumentError.
445
+ def handle_unknown_series(s)
446
+ raise ArgumentError, "unknown series: #{s}"
447
+ end
448
+ end
449
+ end
450
+ end
@@ -0,0 +1,4 @@
1
+ module Ms
2
+ module InSilico
3
+ end
4
+ end
data/tap.yml ADDED
File without changes
@@ -0,0 +1,5 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ # runs all subsets (see Tap::Test::SubsetMethods)
4
+ ENV["ALL"] = "true"
5
+ Dir.glob("./**/*_test.rb").each {|test| require test}
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-in_silico
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-20 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.11"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: molecules
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.0
34
+ version:
35
+ description:
36
+ email: simon.a.chiang@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ - MIT-LICENSE
44
+ files:
45
+ - lib/ms/in_silico.rb
46
+ - lib/ms/in_silico/digest.rb
47
+ - lib/ms/in_silico/digester.rb
48
+ - lib/ms/in_silico/fragment.rb
49
+ - lib/ms/in_silico/spectrum.rb
50
+ - tap.yml
51
+ - README
52
+ - MIT-LICENSE
53
+ has_rdoc: true
54
+ homepage: http://mspire.rubyforge.org/projects/ms-in_silico/
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: mspire
75
+ rubygems_version: 1.3.0
76
+ signing_key:
77
+ specification_version: 2
78
+ summary: ms-in_silico task library
79
+ test_files:
80
+ - test/tap_test_suite.rb