MS-fragmenter 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/charge_calculator.rb +80 -0
- data/lib/fragmenter.rb +184 -0
- data/lib/fragmenter/masses.rb +119 -0
- data/spec/fragmenter_spec.rb +24 -0
- data/spec/spec_helper.rb +12 -0
- metadata +51 -0
@@ -0,0 +1,80 @@
|
|
1
|
+
module MS
|
2
|
+
class ChargeCalculator
|
3
|
+
# This is straight from my pI calculator, and adds the fxn of calculating a maximum charge state for the total peptide, given the sequence.
|
4
|
+
#
|
5
|
+
# Usage: charge_at_pH(identify_potential_charges(peptide_sequence), pH_desired)
|
6
|
+
PepCharges = Struct.new(:seq, :n_term, :c_term, :y_num, :c_num, :k_num, :h_num, :r_num, :d_num, :e_num, :pi)
|
7
|
+
def self.identify_potential_charges(str)
|
8
|
+
string = str.upcase
|
9
|
+
first = string[0]; last = string[-1]
|
10
|
+
puts string if first.nil? or last.nil?
|
11
|
+
begin
|
12
|
+
out = PepCharges.new(string, PkTable[first.to_sym][0], PkTable[last.to_sym][1], 0, 0, 0 ,0 ,0 ,0, 0)
|
13
|
+
rescue NoMethodError
|
14
|
+
abort string
|
15
|
+
end
|
16
|
+
string.chars.each do |letter|
|
17
|
+
case letter
|
18
|
+
when "Y"
|
19
|
+
out.y_num += 1
|
20
|
+
when "C"
|
21
|
+
out.c_num += 1
|
22
|
+
when "K"
|
23
|
+
out.k_num += 1
|
24
|
+
when "H"
|
25
|
+
out.h_num += 1
|
26
|
+
when "R"
|
27
|
+
out.r_num += 1
|
28
|
+
when "D"
|
29
|
+
out.d_num += 1
|
30
|
+
when "E"
|
31
|
+
out.e_num += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
out
|
35
|
+
end # Returns the PepCharges structure
|
36
|
+
|
37
|
+
PkTable = {
|
38
|
+
:K => [2.18,8.95,10.53],
|
39
|
+
:E => [2.19,9.67,4.25],
|
40
|
+
:D => [1.88,9.60,3.65],
|
41
|
+
:H => [1.82,9.17,6.00],
|
42
|
+
:R => [2.17,9.04,12.48],
|
43
|
+
:Q => [2.17,9.13,nil],
|
44
|
+
:N => [2.02,8.80,nil],
|
45
|
+
:C => [1.96,10.28,8.18],
|
46
|
+
:T => [2.11,9.62,nil],
|
47
|
+
:S => [2.21,9.15,nil],
|
48
|
+
:W => [2.38,9.39,nil],
|
49
|
+
:Y => [2.20,9.11,10.07],
|
50
|
+
:F => [1.83,9.13,nil],
|
51
|
+
:M => [2.28,9.21,nil],
|
52
|
+
:I => [2.36,9.68,nil],
|
53
|
+
:L => [2.36,9.60,nil],
|
54
|
+
:V => [2.32,9.62,nil],
|
55
|
+
:P => [1.99,10.96,nil],
|
56
|
+
:A => [2.34,9.69,nil],
|
57
|
+
:G => [2.34,9.60,nil],
|
58
|
+
# These are the fringe cases... B and Z... Jerks, these are harder to calculate pIs
|
59
|
+
:B => [1.95,9.20,3.65],
|
60
|
+
:Z => [2.18,9.40,4.25],
|
61
|
+
:X => [2.20,9.40,nil],
|
62
|
+
:U => [1.96,10.28,5.20] # Unfortunately, I've only found the pKr for this... so I've used Cysteine's values.
|
63
|
+
}
|
64
|
+
|
65
|
+
def self.charge_at_pH(pep_charges, pH)
|
66
|
+
charge = 0
|
67
|
+
charge += -1/(1+10**(pep_charges.c_term-pH))
|
68
|
+
charge += -pep_charges.d_num/(1+10**(PkTable[:D][2]-pH))
|
69
|
+
charge += -pep_charges.e_num/(1+10**(PkTable[:E][2]-pH))
|
70
|
+
charge += -pep_charges.c_num/(1+10**(PkTable[:C][2]-pH))
|
71
|
+
charge += -pep_charges.y_num/(1+10**(PkTable[:Y][2]-pH))
|
72
|
+
charge += 1/(1+10**(pH - pep_charges.n_term))
|
73
|
+
charge += pep_charges.h_num/(1+10**(pH-PkTable[:H][2]))
|
74
|
+
charge += pep_charges.k_num/(1+10**(pH-PkTable[:K][2]))
|
75
|
+
charge += pep_charges.r_num/(1+10**(pH-PkTable[:R][2]))
|
76
|
+
charge
|
77
|
+
end #charge_at_pH
|
78
|
+
end # class ChargeCalculator
|
79
|
+
end
|
80
|
+
|
data/lib/fragmenter.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
require_relative 'fragmenter/masses'
|
2
|
+
require_relative 'charge_calculator'
|
3
|
+
module MS
|
4
|
+
class Fragmenter
|
5
|
+
include MS
|
6
|
+
|
7
|
+
attr_accessor :list
|
8
|
+
TableEntry = Struct.new(:ion, :seq, :mass, :charge, :composition_arr)
|
9
|
+
Ion_Defaults = {:b => true, :y => true}
|
10
|
+
Defaults = {:charge_states => true, :avg => false}
|
11
|
+
def initialize(opts = {}, ion_opts = {})
|
12
|
+
set_options(opts, ion_opts)
|
13
|
+
self
|
14
|
+
end
|
15
|
+
def set_options(opts, ion_opts)
|
16
|
+
#@opts = Default_fragments.merge(opts)
|
17
|
+
opts = Defaults.merge(opts)
|
18
|
+
ion_opts = Ion_Defaults.merge(ion_opts)
|
19
|
+
@n_term_search_ion_types = []
|
20
|
+
@c_term_search_ion_types = []
|
21
|
+
@max_charge = 1 unless opts[:charge_states]
|
22
|
+
#puts "options :charge_states = #{opts[:charge_states]}"
|
23
|
+
ion_opts.each do |key, v|
|
24
|
+
if v
|
25
|
+
case key
|
26
|
+
when :b
|
27
|
+
@n_term_search_ion_types.push(:b, :b_star, :b_not)
|
28
|
+
when :a
|
29
|
+
@n_term_search_ion_types.push(:a, :a_star, :a_not)
|
30
|
+
when :c
|
31
|
+
@n_term_search_ion_types << :c
|
32
|
+
when :x
|
33
|
+
@c_term_search_ion_types << :x
|
34
|
+
when :y
|
35
|
+
@c_term_search_ion_types.push(:y, :y_star, :y_not)
|
36
|
+
when :z
|
37
|
+
@c_term_search_ion_types << :z
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
@mass_list = opts[:avg] ? MS::AvgResidueMasses : MS::MonoResidueMasses
|
42
|
+
#putsv "@mass_list: #{@mass_list}"
|
43
|
+
end #set_options
|
44
|
+
|
45
|
+
def calculate_fragments(sequence)
|
46
|
+
arr = sequence.upcase.split('')
|
47
|
+
out = [[],[]]
|
48
|
+
(0..arr.size-2).each do |i|
|
49
|
+
out[0] << arr[0..i].join
|
50
|
+
out[1] << arr[(i+1)..-1].join
|
51
|
+
end
|
52
|
+
out
|
53
|
+
end
|
54
|
+
# This fxn exists to provide the API consistent with John's request for the 689R class.
|
55
|
+
# Options may include a list of fragment classes as symbols (i.e. :b, :y)
|
56
|
+
def fragment(pep_seq, options={}) # TODO handle an intensity option to handle normalization and scaling...?
|
57
|
+
set_options(options) unless options.empty?
|
58
|
+
generate_fragment_masses(pep_seq)
|
59
|
+
@list.map(&:mass)
|
60
|
+
end
|
61
|
+
def generate_fragment_masses(sequence) # Returns the TableEntry object which should be easy to use for table generation
|
62
|
+
@sequence = sequence
|
63
|
+
@max_charge ||= MS::ChargeCalculator.charge_at_pH(MS::ChargeCalculator.identify_potential_charges(sequence), 2).ceil
|
64
|
+
### Calculate the base ion masses
|
65
|
+
n_terms, c_terms = calculate_fragments(sequence)
|
66
|
+
n_terms.map! do |seq|
|
67
|
+
mass = MS::NTerm
|
68
|
+
seq.chars.map(&:to_sym).each do |residue|
|
69
|
+
mass += @mass_list[residue]
|
70
|
+
end
|
71
|
+
[seq, mass]
|
72
|
+
end
|
73
|
+
c_terms.map! do |seq|
|
74
|
+
mass = MS::CTerm
|
75
|
+
seq.chars.map(&:to_sym).each do |residue|
|
76
|
+
mass += @mass_list[residue]
|
77
|
+
end
|
78
|
+
[seq, mass]
|
79
|
+
end
|
80
|
+
### Tablify and generate a comprehensive list of ions
|
81
|
+
list = []
|
82
|
+
send_to_list = lambda do |fragment_arr, iontypes_arr|
|
83
|
+
fragment_arr.each do |n_terms|
|
84
|
+
seq = n_terms.first
|
85
|
+
mass = n_terms.last
|
86
|
+
iontypes_arr.each do |iontype|
|
87
|
+
(1..@max_charge).each do |charge|
|
88
|
+
charge_legend = '+'*charge
|
89
|
+
list << TableEntry.new("#{iontype}(#{seq.size})#{charge_legend}".to_sym, seq, charge_state(mass + IonTypeMassDelta[iontype], charge), charge)
|
90
|
+
end # 1..max_charge
|
91
|
+
end # iontypes_arr
|
92
|
+
end # fragment_arr
|
93
|
+
end # lambda block
|
94
|
+
send_to_list.call(n_terms, @n_term_search_ion_types)
|
95
|
+
send_to_list.call(c_terms, @c_term_search_ion_types)
|
96
|
+
@list = list
|
97
|
+
end
|
98
|
+
def to_mgf(seq = nil)
|
99
|
+
if seq.nil?
|
100
|
+
seq = @sequence
|
101
|
+
list = @list
|
102
|
+
else
|
103
|
+
list = generate_fragment_masses(seq)
|
104
|
+
end
|
105
|
+
intensity = 1000 # An arbitrary intensity value
|
106
|
+
output_arr = []
|
107
|
+
output_arr << %Q{COM=Project: In-silico Fragmenter\nBEGIN IONS\nPEPMASS=#{precursor_mass(seq, @max_charge)}\nCHARGE=#{@max_charge}+\nTITLE=Label: Sequence is #{seq}}
|
108
|
+
list.sort_by{|a| a.mass}.each do |table_entry|
|
109
|
+
# TableEntry = Struct.new(:ion, :seq, :mass, :charge)
|
110
|
+
output_arr << "#{"%.5f" % table_entry.mass }\t#{intensity}"
|
111
|
+
end
|
112
|
+
output_arr << "END IONS"
|
113
|
+
File.open("#{seq}.mgf", 'w') {|o| o.print output_arr.join("\n") }
|
114
|
+
output_arr.join("\n")
|
115
|
+
end
|
116
|
+
def graph(list = nil)
|
117
|
+
list ? list : list = @list
|
118
|
+
require 'rserve/simpler'
|
119
|
+
robj = Rserve::Simpler.new
|
120
|
+
hash = {}
|
121
|
+
hash["mass"] = list.map(&:mass)
|
122
|
+
hash["intensity"] = list.map{ 1000.0} # Hacky standard intensity value
|
123
|
+
robj.converse( masses: hash.to_dataframe) do
|
124
|
+
"attach(masses)"
|
125
|
+
end
|
126
|
+
#robj.converse( data: Rserve::DataFrame.from_structs(list))
|
127
|
+
robj.converse "setwd('#{Dir.pwd}')"
|
128
|
+
output_file_name = "#{@sequence}_spectra.png"
|
129
|
+
robj.converse do
|
130
|
+
%Q{png(file='#{output_file_name}')
|
131
|
+
plot(masses$mass, masses$intensity, type='h')
|
132
|
+
dev.off()
|
133
|
+
}
|
134
|
+
end
|
135
|
+
output_file_name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
######### Testing stuff
|
142
|
+
if $0 == __FILE__
|
143
|
+
require 'optparse'
|
144
|
+
options = {charge_states: true, avg: false}
|
145
|
+
ion_options = {}
|
146
|
+
parser = OptionParser.new do |opts|
|
147
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} sequence [options]"
|
148
|
+
opts.separator "Output: [Array] (containing fragment ion masses)"
|
149
|
+
|
150
|
+
opts.on('--ion_type a,b,c,x,y,z', Array, "Select ion types (default is b,y)") do |t|
|
151
|
+
arr = t.map{|a| a.downcase.to_sym}
|
152
|
+
hash = {}
|
153
|
+
arr.each {|a| hash[a] = true}
|
154
|
+
ion_options[:ion_types] = hash
|
155
|
+
end
|
156
|
+
opts.on('--[no-]charge_states', "Turn on or off the charge state output") do |s|
|
157
|
+
options[:charge_states] = s
|
158
|
+
end
|
159
|
+
opts.on('-a', '--avg', "Use average masses to calculate ions instead of monoisotopic masses") do |a|
|
160
|
+
options[:avg] = a
|
161
|
+
end
|
162
|
+
if ARGV.size == 0
|
163
|
+
puts opts
|
164
|
+
exit
|
165
|
+
end
|
166
|
+
opts.on('--[no-]charge_states', "Turn off output of multiple charge states in list") do |s|
|
167
|
+
options[:charge_states] = s
|
168
|
+
end
|
169
|
+
opts.on() do
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
opts.on_tail('-h', '--help', "Show this message") do
|
174
|
+
puts opts
|
175
|
+
exit
|
176
|
+
end
|
177
|
+
end.parse! # OptionParser
|
178
|
+
if ARGV.size >= 1
|
179
|
+
f = Fragmenter.new(options, ion_options)
|
180
|
+
f.fragment(ARGV.first)
|
181
|
+
puts "I graphed these fragments and wrote them to #{f.graph} for you."
|
182
|
+
end
|
183
|
+
end # $0 == __FILE__
|
184
|
+
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module MS
|
2
|
+
Proton = 1.00782503207
|
3
|
+
def precursor_mass(seq, charge)
|
4
|
+
mass = NTerm + CTerm
|
5
|
+
seq.chars.map(&:to_sym).each do |residue|
|
6
|
+
mass += MS::MonoResidueMasses[residue]
|
7
|
+
end
|
8
|
+
charge_state(mass, charge)
|
9
|
+
end
|
10
|
+
|
11
|
+
def charge_state(mass, charge)
|
12
|
+
if charge > 0
|
13
|
+
(mass + charge) / charge.to_f
|
14
|
+
else
|
15
|
+
(mass - charge) / charge.to_f
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
IonTypeMassDelta = {
|
20
|
+
a: (- 29.00273),
|
21
|
+
a_star: (-(29.00273+17.02654)),
|
22
|
+
a_not: (-(17.02654 + 29.00273+18.01056)),
|
23
|
+
b: (-1.00782),
|
24
|
+
b_star: ( - 1.00782 - 17.02654),
|
25
|
+
b_not: (-17.02654 - 1.00782 - 18.01056),
|
26
|
+
c: (16.01872),
|
27
|
+
x: (27.99491 - 1.00782),
|
28
|
+
y: (1.00782),
|
29
|
+
y_star: (1.00782 - 17.02654),
|
30
|
+
y_not: (1.00782 - 18.01056),
|
31
|
+
z: (- 16.01872)
|
32
|
+
}
|
33
|
+
|
34
|
+
NTerm = 1.00782
|
35
|
+
|
36
|
+
CTerm = 27.99491 - 10.9742
|
37
|
+
|
38
|
+
MonoResidueMasses = {
|
39
|
+
:A => 71.037114,
|
40
|
+
:R => 156.101111,
|
41
|
+
:N => 114.042927,
|
42
|
+
:D => 115.026943,
|
43
|
+
:C => 103.009185,
|
44
|
+
:E => 129.042593,
|
45
|
+
:Q => 128.058578,
|
46
|
+
:G => 57.021464,
|
47
|
+
:H => 137.058912,
|
48
|
+
:I => 113.084064,
|
49
|
+
:L => 113.084064,
|
50
|
+
:K => 128.094963,
|
51
|
+
:M => 131.040485,
|
52
|
+
:F => 147.068414,
|
53
|
+
:P => 97.052764,
|
54
|
+
:S => 87.032028,
|
55
|
+
:T => 101.047679,
|
56
|
+
:U => 150.95363,
|
57
|
+
:W => 186.079313,
|
58
|
+
:Y => 163.06332,
|
59
|
+
:V => 99.068414,
|
60
|
+
:* => 118.805716,
|
61
|
+
:B => 172.048405,
|
62
|
+
:X => 118.805716,
|
63
|
+
:Z => 128.550585
|
64
|
+
}
|
65
|
+
AvgResidueMasses = {
|
66
|
+
:* => 118.88603,
|
67
|
+
:A => 71.0779,
|
68
|
+
:B => 172.1405,
|
69
|
+
:C => 103.1429,
|
70
|
+
:D => 115.0874,
|
71
|
+
:E => 129.11398,
|
72
|
+
:F => 147.17386,
|
73
|
+
:G => 57.05132,
|
74
|
+
:H => 137.13928,
|
75
|
+
:I => 113.15764,
|
76
|
+
:K => 128.17228,
|
77
|
+
:L => 113.15764,
|
78
|
+
:M => 131.19606,
|
79
|
+
:N => 114.10264,
|
80
|
+
:O => 211.28076,
|
81
|
+
:P => 97.11518,
|
82
|
+
:Q => 128.12922,
|
83
|
+
:R => 156.18568,
|
84
|
+
:S => 87.0773,
|
85
|
+
:T => 101.10388,
|
86
|
+
:U => 150.0379,
|
87
|
+
:V => 99.13106,
|
88
|
+
:W => 186.2099,
|
89
|
+
:X => 118.88603,
|
90
|
+
:Y => 163.17326,
|
91
|
+
:Z => 128.6231
|
92
|
+
}
|
93
|
+
end
|
94
|
+
################
|
95
|
+
=begin
|
96
|
+
Formula: H3N1
|
97
|
+
|
98
|
+
Monoisotopic mass : 17.02654
|
99
|
+
|
100
|
+
Formula: C1H1O1
|
101
|
+
|
102
|
+
Monoisotopic mass : 29.00273
|
103
|
+
|
104
|
+
Formula: H2O1
|
105
|
+
|
106
|
+
Monoisotopic mass : 18.01056
|
107
|
+
|
108
|
+
Formula: H1
|
109
|
+
|
110
|
+
Monoisotopic mass : 1.00782
|
111
|
+
|
112
|
+
Formula: H2N1
|
113
|
+
|
114
|
+
Monoisotopic mass : 16.01872
|
115
|
+
|
116
|
+
Formula: C1O1
|
117
|
+
|
118
|
+
Monoisotopic mass : 27.99491
|
119
|
+
=end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Fragmenter do
|
4
|
+
before :each do
|
5
|
+
@f = Fragmenter.new
|
6
|
+
end
|
7
|
+
it "generates an appropriate response" do
|
8
|
+
resp = @f.fragment("REALPEPTIDE")
|
9
|
+
resp.should be_a Array
|
10
|
+
resp.size.should == 120
|
11
|
+
resp.include?(157.101111).should be_true
|
12
|
+
end
|
13
|
+
it "handles a single charge state limitation" do
|
14
|
+
f = Fragmenter.new(:charge_states => false)
|
15
|
+
resp = f.fragment("RYANASTAFK")
|
16
|
+
resp.size.should == 54
|
17
|
+
resp.include?(982.466821).should be_true
|
18
|
+
end
|
19
|
+
it "calculates more ions for the acceptable charge states" do
|
20
|
+
resp = @f.fragment("RYANASTAFK")
|
21
|
+
resp.size.should == 162
|
22
|
+
resp.include?(491.7334105).should be_true
|
23
|
+
end
|
24
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: MS-fragmenter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ryan Taylor
|
9
|
+
autorequire:
|
10
|
+
bindir:
|
11
|
+
- bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2012-12-10 00:00:00.000000000 Z
|
14
|
+
dependencies: []
|
15
|
+
description: A peptide sequence fragmenter which will handle graphing and mgf output,
|
16
|
+
as well as command line fragmentation with options
|
17
|
+
email: ryanmt@byu.net
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/fragmenter.rb
|
23
|
+
- lib/charge_calculator.rb
|
24
|
+
- lib/fragmenter/masses.rb
|
25
|
+
- spec/spec_helper.rb
|
26
|
+
- spec/fragmenter_spec.rb
|
27
|
+
homepage: https://github.com/ryanmt/fragmenter
|
28
|
+
licenses: []
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 1.8.24
|
48
|
+
signing_key:
|
49
|
+
specification_version: 3
|
50
|
+
summary: ryanmt's peptide fragmenter
|
51
|
+
test_files: []
|