ms-mascot 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2008, Regents of the University of Colorado.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
4
+ software and associated documentation files (the "Software"), to deal in the Software
5
+ without restriction, including without limitation the rights to use, copy, modify, merge,
6
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7
+ to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or
10
+ substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ = {Ms-Mascot}[http://mspire.rubyforge.org/projects/ms-mascot]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting {Mascot}[http://www.matrixscience.com/].
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/bahuvrihi/ms-mascot/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Mascot is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-mascot
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
20
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
21
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
22
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,123 @@
1
+ # = Usage
2
+ # tap generate_mgf {options} protein_sequences
3
+ #
4
+ # When specifying the ions to include, alternate charge states can be
5
+ # specified using + and -, for example 'y++' or 'b-'. The available ion
6
+ # series are [a,b,c,x,y,z].
7
+ #
8
+ # = Description
9
+ # Digests, fragments, then formats the protein sequences into mgf files.
10
+ # Use the options to specify/modify digestion enzymes, as well as the
11
+ # type of ions to generate.
12
+ #
13
+ # = Information
14
+ #
15
+ # Copyright (c) 2006-2007, Regents of the University of Colorado.
16
+ # Developer:: Simon Chiang, Biomolecular Structure Program
17
+ # Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
18
+ # Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
19
+ #
20
+
21
+ require 'tap/script'
22
+ include Constants::Library
23
+
24
+ app = Tap::App.instance
25
+
26
+ #
27
+ # handle options
28
+ #
29
+
30
+ opts = Prospector::Digest.configurations.to_opts
31
+ opts += Mascot::Formats::Mgf::Print.configurations.to_opts
32
+ opts += [
33
+ ['--charge', '-c', GetoptLong::REQUIRED_ARGUMENT, "Parent ion charge for mgf files. (default +1)"],
34
+
35
+ ['--ions', '-i', GetoptLong::REQUIRED_ARGUMENT, "Comma-separated string of ion series to include. (default 'yb')"],
36
+ #['--enzyme_file', nil, GetoptLong::REQUIRED_ARGUMENT, "Specifes a Prospector-style enzyme config file."],
37
+ ['--residue_precision', nil, GetoptLong::REQUIRED_ARGUMENT, "The precision of residues, ex 6 for 57.021464"],
38
+ ['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
39
+ ['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifes debug mode."]]
40
+
41
+ digest_config = {}
42
+ print_config = {}
43
+ series = "yb"
44
+ charge = 1
45
+ residue_precision = 6
46
+
47
+ Tap::Script.handle_options(*opts) do |opt, value|
48
+ case opt
49
+ when '--help'
50
+ puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
51
+ puts
52
+ puts Tap::Script.usage_options(opts)
53
+ exit
54
+
55
+ when '--debug'
56
+ app.options.debug = true
57
+
58
+ when '--ions'
59
+ series = value
60
+
61
+ when '--charge'
62
+ charge = value.to_i
63
+
64
+ when '--residue_precision'
65
+ residue_precision = value.to_i
66
+
67
+ else
68
+ key = Prospector::Digest.configurations.opt_map(opt)
69
+ digest_config[key] = YAML.load(value) if key
70
+
71
+ key = Mascot::Formats::Mgf::Print.configurations.opt_map(opt)
72
+ print_config[key] = YAML.load(value) if key
73
+ end
74
+ end
75
+
76
+ if ARGV.empty?
77
+ puts "no sequences specified"
78
+ exit
79
+ end
80
+
81
+ #
82
+ # add your script code here
83
+ #
84
+ series = series.scan(/\w\-*\+*/)
85
+
86
+ #loader = Prospector::LoadDigesters.new
87
+ #loader.enq(enzyme_file)
88
+
89
+ #
90
+ digest = Prospector::Digest.new(nil, digest_config)
91
+
92
+ #
93
+ n = Molecule[digest.nterm]
94
+ c = Molecule[digest.cterm]
95
+
96
+ fragment = Tap::Task.new do |task, polypeptides|
97
+ polypeptides.collect do |polypeptide, start_index, end_index|
98
+ task.log :fragment, polypeptide.sequence[0..10] + (polypeptide.sequence.length > 10 ? "..." : "")
99
+
100
+ f = Mascot::FragmentSpectrum.new(polypeptide.sequence, n, c, residue_precision)
101
+
102
+ headers = {
103
+ :title => polypeptide.sequence,
104
+ :charge => charge,
105
+ :pepmass => (f.mass(n) + f.ladder.last + f.mass(c) + charge * f.proton_mass)/charge
106
+ }
107
+
108
+ data = series.collect {|s| f.series(s)}.flatten.delete_if {|mass| mass < 0 }.sort
109
+ data = [data, Array.new(data.length, 1)].transpose
110
+
111
+ Mascot::Formats::Mgf::Entry.new(headers, data)
112
+ end
113
+ end
114
+
115
+ #
116
+ print = Mascot::Formats::Mgf::Print.new('generate_mgf', print_config)
117
+
118
+ # workflow
119
+ digest.enq(*ARGV)
120
+ ARGV.clear
121
+
122
+ app.sequence(digest, fragment, print)
123
+ app.run
@@ -0,0 +1,123 @@
1
+ # = Usage
2
+ # tap generate_mgf {options} protein_sequences
3
+ #
4
+ # When specifying the ions to include, alternate charge states can be
5
+ # specified using + and -, for example 'y++' or 'b-'. The available ion
6
+ # series are [a,b,c,x,y,z].
7
+ #
8
+ # = Description
9
+ # Digests, fragments, then formats the protein sequences into mgf files.
10
+ # Use the options to specify/modify digestion enzymes, as well as the
11
+ # type of ions to generate.
12
+ #
13
+ # = Information
14
+ #
15
+ # Copyright (c) 2006-2007, Regents of the University of Colorado.
16
+ # Developer:: Simon Chiang, Biomolecular Structure Program
17
+ # Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
18
+ # Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
19
+ #
20
+
21
+ require 'tap/script'
22
+ include Constants::Library
23
+
24
+ app = Tap::App.instance
25
+
26
+ #
27
+ # handle options
28
+ #
29
+
30
+ opts = Prospector::Digest.configurations.to_opts
31
+ opts += Mascot::Formats::Mgf::Print.configurations.to_opts
32
+ opts += [
33
+ ['--charge', '-c', GetoptLong::REQUIRED_ARGUMENT, "Parent ion charge for mgf files. (default +1)"],
34
+
35
+ ['--ions', '-i', GetoptLong::REQUIRED_ARGUMENT, "Comma-separated string of ion series to include. (default 'yb')"],
36
+ #['--enzyme_file', nil, GetoptLong::REQUIRED_ARGUMENT, "Specifes a Prospector-style enzyme config file."],
37
+ ['--residue_precision', nil, GetoptLong::REQUIRED_ARGUMENT, "The precision of residues, ex 6 for 57.021464"],
38
+ ['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
39
+ ['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifes debug mode."]]
40
+
41
+ digest_config = {}
42
+ print_config = {}
43
+ series = "yb"
44
+ charge = 1
45
+ residue_precision = 6
46
+
47
+ Tap::Script.handle_options(*opts) do |opt, value|
48
+ case opt
49
+ when '--help'
50
+ puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
51
+ puts
52
+ puts Tap::Script.usage_options(opts)
53
+ exit
54
+
55
+ when '--debug'
56
+ app.options.debug = true
57
+
58
+ when '--ions'
59
+ series = value
60
+
61
+ when '--charge'
62
+ charge = value.to_i
63
+
64
+ when '--residue_precision'
65
+ residue_precision = value.to_i
66
+
67
+ else
68
+ key = Prospector::Digest.configurations.opt_map(opt)
69
+ digest_config[key] = YAML.load(value) if key
70
+
71
+ key = Mascot::Formats::Mgf::Print.configurations.opt_map(opt)
72
+ print_config[key] = YAML.load(value) if key
73
+ end
74
+ end
75
+
76
+ if ARGV.empty?
77
+ puts "no sequences specified"
78
+ exit
79
+ end
80
+
81
+ #
82
+ # add your script code here
83
+ #
84
+ series = series.scan(/\w\-*\+*/)
85
+
86
+ #loader = Prospector::LoadDigesters.new
87
+ #loader.enq(enzyme_file)
88
+
89
+ #
90
+ digest = Prospector::Digest.new(nil, digest_config)
91
+
92
+ #
93
+ n = Molecule[digest.nterm]
94
+ c = Molecule[digest.cterm]
95
+
96
+ fragment = Tap::Task.new do |task, polypeptides|
97
+ polypeptides.collect do |polypeptide, start_index, end_index|
98
+ task.log :fragment, polypeptide.sequence[0..10] + (polypeptide.sequence.length > 10 ? "..." : "")
99
+
100
+ f = Prospector::FragmentSpectrum.new(polypeptide.sequence, n, c)
101
+
102
+ headers = {
103
+ :title => polypeptide.sequence,
104
+ :charge => charge,
105
+ :pepmass => (n.mass + polypeptide.mass + c.mass + charge * (Molecule['H'].mass - Particle['Electron'].mass))/charge
106
+ }
107
+
108
+ data = series.collect {|s| f.series(s)}.flatten.delete_if {|mass| mass < 0 }.sort
109
+ data = [data, Array.new(data.length, 1)].transpose
110
+
111
+ Mascot::Formats::Mgf::Entry.new(headers, data)
112
+ end
113
+ end
114
+
115
+ #
116
+ print = Mascot::Formats::Mgf::Print.new('generate_mgf', print_config)
117
+
118
+ # workflow
119
+ digest.enq(*ARGV)
120
+ ARGV.clear
121
+
122
+ app.sequence(digest, fragment, print)
123
+ app.run
@@ -0,0 +1,90 @@
1
+ # = Usage
2
+ # tap reformat_mgf {options} MGF_FILES
3
+ #
4
+ # = Description
5
+ # Reformats mgf files to a standard output like:
6
+ #
7
+ # BEGIN IONS
8
+ # TITLE=7100401blank.190.190.2.dta
9
+ # CHARGE=2+
10
+ # PEPMASS=321.571138
11
+ # 100.266 2.0
12
+ # 111.323 2.5
13
+ # ...
14
+ # 496.110 3.3
15
+ # 601.206 1.3
16
+ # END IONS
17
+ #
18
+ # = Information
19
+ #
20
+ # Copyright (c) 2006-2007, Regents of the University of Colorado.
21
+ # Developer:: Simon Chiang, Biomolecular Structure Program
22
+ # Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
23
+ # Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
24
+ #
25
+ require 'tap/script'
26
+
27
+ app = Tap::App.instance
28
+
29
+ #
30
+ # handle options
31
+ #
32
+
33
+ opts = [
34
+ ['--target_dir', '-t', GetoptLong::REQUIRED_ARGUMENT, "Specify an output directory."],
35
+ ['--mz_precision', '-m', GetoptLong::REQUIRED_ARGUMENT, "Specify the mz precision."],
36
+ ['--intensity_precision', '-i', GetoptLong::REQUIRED_ARGUMENT, "Specify the intensity precision."],
37
+ ['--pepmass_precision', '-p', GetoptLong::REQUIRED_ARGUMENT, "Specify the peptide mass precision."],
38
+ ['--headers', nil, GetoptLong::REQUIRED_ARGUMENT, "Specify the headers to include, separated by commas."],
39
+ ['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
40
+ ['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifies debug mode."]]
41
+
42
+ config = {:target_dir => 'reformatted'}
43
+
44
+ Tap::Script.handle_options(*opts) do |opt, value|
45
+ case opt
46
+ when '--help'
47
+ puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
48
+ puts
49
+ puts Tap::Script.usage_options(opts)
50
+ exit
51
+
52
+ when '--debug'
53
+ app.options.debug = true
54
+
55
+ when '--headers'
56
+ value = value[1..-2] if value[0] == 34 && value[-1] == 34
57
+ config[:headers] = value.split(/,/).collect {|header| header.strip}
58
+ else
59
+ opt =~ /--(.*)/
60
+ config[$1.to_sym] = value
61
+
62
+ end
63
+ end
64
+
65
+ #
66
+ # add your script code here
67
+ #
68
+
69
+ require 'mascot/formats/mgf'
70
+
71
+ reformat = Tap::FileTask.new("", config) do |task, input|
72
+ target = task.filepath(task.config[:target_dir], File.basename(input))
73
+ task.prepare(target)
74
+
75
+ task.log_basename :reformatting, input
76
+ Mascot::Formats::Mgf::Archive.open(input) do |archive|
77
+ archive.reindex if archive.length == 0
78
+
79
+ File.open(target, "wb") do |output|
80
+ archive.each do |mgf|
81
+ mgf.puts(output, task.config)
82
+ output.puts
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ args = ARGV.dup
89
+ ARGV.clear
90
+ app.run(reformat, *args)
@@ -0,0 +1,5 @@
1
+ module Ms
2
+ module Mascot
3
+ FRAGMENT_TEST_MASS_UNCERTAINTY = 10**-2
4
+ end
5
+ end
@@ -0,0 +1,20 @@
1
+ require 'tap/http/dispatch'
2
+
3
+ module Ms
4
+ module Mascot
5
+ # :startdoc::manifest exports results from a search
6
+ # UNDER CONSTRUCTION
7
+ class Export < Tap::Http::Dispatch
8
+
9
+ def process(*mascot_files)
10
+ # generate request hashes for the mgf files using the
11
+ # configured parameters
12
+ requests = mascot_files.collect do |mascot_file|
13
+ {:params => params.merge("file" => mascot_file)}
14
+ end
15
+
16
+ super(*requests)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,39 @@
1
+ require 'ms/in_silico/fragment'
2
+ require 'ms/mascot/spectrum'
3
+
4
+ module Ms
5
+ module Mascot
6
+
7
+ # Ms::Mascot::Fragment::manifest calculates a theoretical Mascot ms/ms spectrum
8
+ #
9
+ # Calculates the parent ion mass and theoretical ms/ms spectrum for a peptide
10
+ # sequence. Configurations allow the specification of one or more
11
+ # fragmentation series to include, as well as charge, and intensity.
12
+ #
13
+ # % rap fragment TVQQEL --+ dump --no-audit
14
+ # # date: 2008-09-15 14:37:55
15
+ # ---
16
+ # ms/mascot/fragment (:...:):
17
+ # - - 717.3777467
18
+ # - - 102.054955
19
+ # - 132.1019047
20
+ # - 201.123369
21
+ # - 261.1444977
22
+ # - 329.181947
23
+ # - 389.2030757
24
+ # - 457.240525
25
+ # - 517.2616537
26
+ # - 586.283118
27
+ # - 616.3300677
28
+ #
29
+ # In the output, the parent ion mass is given first, followed by an array of
30
+ # the sorted fragmentation data.
31
+ class Fragment < InSilico::Fragment
32
+
33
+ def spectrum(peptide)
34
+ Mascot::Spectrum.new(peptide, nterm, cterm)
35
+ end
36
+
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,2 @@
1
+ require 'mascot/formats/mgf/entry'
2
+ require 'mascot/formats/mgf/archive'
@@ -0,0 +1,24 @@
1
+ require 'external'
2
+ require 'ms/mascot/mgf/entry'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Mgf
7
+
8
+ # Provides array-like access to an mgf archival file.
9
+ class Archive < ExternalArchive
10
+
11
+ # Reindexes self to each mgf entry in io
12
+ def reindex(&block)
13
+ reindex_by_sep("BEGIN IONS", :entry_follows_sep => true, &block)
14
+ end
15
+
16
+ # Returns an Mgf::Entry initialized using str
17
+ def str_to_entry(str)
18
+ Entry.parse(str)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,162 @@
1
+ module Ms
2
+ module Mascot
3
+ module Mgf
4
+
5
+ # Represents a mascot generic file (mgf) formatted entry.
6
+ #
7
+ # BEGIN IONS
8
+ # TITLE=7100401blank.190.190.2.dta
9
+ # CHARGE=2+
10
+ # PEPMASS=321.571138
11
+ # 100.266 2.0
12
+ # 111.323 2.5
13
+ # ...
14
+ # 496.110 3.3
15
+ # 601.206 1.3
16
+ # END IONS
17
+ #
18
+ class Entry
19
+ class << self
20
+
21
+ # Parses the entry string into an Mgf::Entry. The entry must be
22
+ # complete and properly formatted, ie it must begin with a
23
+ # 'BEGIN IONS' line and end with an 'END IONS' line.
24
+ def parse(str)
25
+ entry = Entry.new
26
+
27
+ lines = str.strip.split(/\s*\r?\n\s*/)
28
+
29
+ unless lines.shift == "BEGIN IONS"
30
+ raise ArgumentError, "input should begin with 'BEGIN IONS'"
31
+ end
32
+
33
+ unless lines.pop == "END IONS"
34
+ raise ArgumentError, "input should end with 'END IONS'"
35
+ end
36
+
37
+ lines.each do |line|
38
+ if line =~ /^(.*?)=(.*)$/
39
+ entry[$1] = $2
40
+ else
41
+ entry.data << line.split(/\s+/, 2).collect {|i| i.to_f }
42
+ end
43
+ end
44
+
45
+ entry
46
+ end
47
+ end
48
+
49
+ # A hash of mgf headers, not including CHARGE and PEPMASS
50
+ attr_reader :headers
51
+
52
+ # The charge of the entry
53
+ attr_accessor :charge
54
+
55
+ # The peptide mass of the entry
56
+ attr_accessor :pepmass
57
+
58
+ # The data (mz/intensity) for the entry
59
+ attr_accessor :data
60
+
61
+ # Initialized a new Entry using the headers and data. Set charge
62
+ # and pepmass using the CHARGE and PEPMASS headers.
63
+ def initialize(headers={}, data=[])
64
+ @headers = {}
65
+ @pepmass = nil
66
+ @charge = nil
67
+ @data = data
68
+
69
+ headers.each_pair do |key, value|
70
+ self[key] = value
71
+ end
72
+ end
73
+
74
+ # Retrieve a header using an mgf header string. CHARGE and PEPMASS
75
+ # headers can be retrieved using [], and will reflect the current
76
+ # values of charge and pepmass. Keys are stringified and upcased.
77
+ def [](key)
78
+ key = key.to_s.upcase
79
+ case key
80
+ when "PEPMASS" then pepmass.to_s
81
+ when "CHARGE" then charge_to_s
82
+ else headers[key]
83
+ end
84
+ end
85
+
86
+ # Set a header using an mgf header string. CHARGE and PEPMASS headers
87
+ # may be set using using []=, and will modify the current values of
88
+ # charge and pepmass. Keys are stringified and upcased.
89
+ def []=(key, value)
90
+ key = key.to_s.upcase
91
+ case key
92
+ when "PEPMASS"
93
+ self.pepmass = value.to_f
94
+ when "CHARGE"
95
+ value = case value
96
+ when Fixnum then value
97
+ when /^(\d+)([+-])$/ then $1.to_i * ($2 == "+" ? 1 : -1)
98
+ else raise "charge should be an number, or a string formatted like '1+' or '1-'"
99
+ end
100
+
101
+ self.charge = value
102
+ else
103
+ headers[key] = value
104
+ end
105
+ end
106
+
107
+ # Formats and puts self to the target. Use the options to modify the
108
+ # output:
109
+ #
110
+ # headers:: an array of headers to include (by default all headers
111
+ # will be included; pepmass and charge will always be
112
+ # included)
113
+ # pepmass_precision:: integer value specifying precision of pepmass
114
+ # mz_precision:: integer value specifying precision of mz values
115
+ # intensity_precision:: integer value specifying precision of intensity
116
+ # values
117
+ def dump(target="", options={})
118
+ options = {
119
+ :mz_precision => nil,
120
+ :intensity_precision => nil,
121
+ :pepmass_precision => nil,
122
+ :headers => nil
123
+ }.merge(options)
124
+
125
+ target << "BEGIN IONS\n"
126
+ (options[:headers] || headers.keys).each do |key|
127
+ target << "#{key.upcase}=#{headers[key]}\n"
128
+ end
129
+
130
+ target << "CHARGE=#{charge_to_s}\n"
131
+ target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
132
+
133
+ data_format = "#{format options[:mz_precision]} #{format options[:intensity_precision]}\n"
134
+ data.each do |data_point|
135
+ target << (data_format % data_point)
136
+ end
137
+
138
+ target << "END IONS\n"
139
+ target
140
+ end
141
+
142
+ # Returns self formatted as a string
143
+ def to_s
144
+ dump
145
+ end
146
+
147
+ private
148
+
149
+ # formats the charge as a string
150
+ def charge_to_s # :nodoc:
151
+ charge == nil ? "" : "#{charge.abs}#{charge > 0 ? '+' : '-'}"
152
+ end
153
+
154
+ # returns a format string for the specified precision
155
+ def format(precision) # :nodoc:
156
+ precision == nil ? "%s" : "%.#{precision}f"
157
+ end
158
+
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,94 @@
1
+ require 'ms/in_silico/digest'
2
+ require 'ms/mascot/fragment'
3
+ require 'ms/mascot/mgf/archive'
4
+
5
+ module Ms
6
+ module Mascot
7
+
8
+ # Ms::Mascot::Predict::manifest predicts the spectra for a protein sequence
9
+ #
10
+ # Fragments a protein sequence and calculates the fragment spectra for
11
+ # each peptide. The peptide spectra are formatted as mgf and dumped to
12
+ # the target.
13
+ #
14
+ # % rap predict MAEELVLERCDLELETNGRDHHTADLCREKLVVRRGQPFWLTLHFEGRNYEASVDSLTFS
15
+ # I[16:30:19] digest MAEELVLERCD... to 15 peptides
16
+ # I[16:30:19] fragment MAEELVLER
17
+ # I[16:30:19] fragment MAEELVLERCDLELETNGR
18
+ # I[16:30:19] fragment CDLELETNGR
19
+ # I[16:30:19] fragment CDLELETNGRDHHTADLCR
20
+ # I[16:30:19] fragment DHHTADLCR
21
+ # I[16:30:19] fragment DHHTADLCREK
22
+ # I[16:30:19] fragment EKLVVR
23
+ # I[16:30:19] fragment LVVR
24
+ # I[16:30:19] fragment LVVRR
25
+ # I[16:30:19] fragment RGQPFWLTLHFEGR
26
+ # I[16:30:19] fragment GQPFWLTLHFEGR
27
+ # I[16:30:19] fragment GQPFWLTLHFEGRNYEASVDSLTFS
28
+ # I[16:30:19] fragment NYEASVDSLTFS
29
+ #
30
+ class Predict < Tap::FileTask
31
+ define :digest, InSilico::Digest, {:max_misses => 1}
32
+ define :fragment, Mascot::Fragment, {:intensity => 1, :unmask => true, :sort => true}
33
+
34
+ config :headers, nil, &c.hash_or_nil # a hash of headers to include
35
+ config :min_length, 3, &c.integer_or_nil # the minimum peptide length
36
+ config :mz_precision, 6, &c.integer # the precision of mzs
37
+ config :intensity_precision, 0, &c.integer # the precision of intensities
38
+ config :pepmass_precision, 6, &c.integer # the precision of peptide mass
39
+
40
+ # Sequences digest and fragment. When fragment completes, it will add
41
+ # a new mgf entry to the internal entries collection.
42
+ def workflow
43
+ digest.on_complete do |_results|
44
+ _results._iterate.each do |_result|
45
+ next if min_length && _result._current.length < min_length
46
+ fragment._execute(_result)
47
+ end
48
+ end
49
+
50
+ fragment.on_complete do |_result|
51
+ parent_ion_mass, data = _result._current
52
+ next if data.empty?
53
+
54
+ peptide = _result._values[-2]
55
+ headers = {
56
+ 'TITLE' => "#{peptide} (#{fragment.series.join(', ')})",
57
+ 'CHARGE' => fragment.charge,
58
+ 'PEPMASS' => parent_ion_mass}
59
+
60
+ @entries << Mgf::Entry.new(headers, data)
61
+ end
62
+ end
63
+
64
+ # Infers a default path for the output mgf file from the sequence; the
65
+ # path is the sequence if the sequence is less than 10 characters,
66
+ # otherwise it's like: "<first five>_<last five>.mgf"
67
+ #
68
+ def default_path(sequence)
69
+ sequence = "#{sequence[0,5]}_#{sequence[-5,5]}" if sequence.length > 10
70
+ "#{sequence}.mgf"
71
+ end
72
+
73
+ def process(sequence, target=nil)
74
+ sequence = sequence.gsub(/\s/, "")
75
+
76
+ @entries = []
77
+ digest.execute(sequence)
78
+
79
+ # prepare and dump the predicted spectra
80
+ # to the target path.
81
+ target = default_path(sequence) if target == nil
82
+ prepare(target)
83
+ File.open(target, "wb") do |file|
84
+ @entries.each do |entry|
85
+ entry.dump(file, config)
86
+ file.puts
87
+ end
88
+ end
89
+
90
+ target
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,188 @@
1
+ require 'ms/in_silico/spectrum'
2
+
3
+ module Ms
4
+ module Mascot
5
+
6
+ # Generates a Mascot-style theoretical spectrum. When the masses are
7
+ # set correctly, the theoretical spectrum will have zero error and
8
+ # full coverage (for whatever series are generated) when identified
9
+ # using Mascot.
10
+ #
11
+ # === Peptide Mass Error
12
+ #
13
+ # The peptide mass calculated by Spectrum is inexact wrt to Mascot.
14
+ # Mascot uses some unknown algorithm to speed up it's calculation
15
+ # and introduces some rounding/truncation error somewhere along
16
+ # the line. For instance, if you calculate the mass of a peptide
17
+ # by directly using the Unimod masses, it will NOT be the mass used
18
+ # by Mascot. For example:
19
+ #
20
+ # def molecule_mass(c, h, n, o, s)
21
+ # c * 12 + h * 1.007825035 + n * 14.003074 + o * 15.99491463 + s * 31.9720707
22
+ # end
23
+ #
24
+ # # Formula for MFSFVDLR: C(47)H(69)N(11)O(11)S(0)
25
+ # # Formula for water: C(0) H(2) N(0) O(1) S(0)
26
+ #
27
+ # molecule_mass(47, 69, 11, 11, 1) + molecule_mass(0, 2, 0, 1, 0)
28
+ # # => 1013.500437745
29
+ #
30
+ # Now by comparision:
31
+ #
32
+ # mascot: 1013.500443
33
+ # unimod: 1013.500437745
34
+ # delta: 0.000005255
35
+ #
36
+ # Similar or worse errors are typical and cannot be elimited by
37
+ # any known permutation (calculating from the residue masses,
38
+ # rounding etc). See http://gist.github.com/31241 for tasks that
39
+ # perform the calculation using various permutations.
40
+ #
41
+ # One helpful note if you try to break the code, you can set the
42
+ # number of sig figs to 6 in mascot.dat (MassDecimalPlaces) and
43
+ # read the exact peptide mass numbers directly from a result page.
44
+ #
45
+ # Spectrum calculates peptide mass using the masses in mass_map,
46
+ # ie the rounded residue masses.
47
+ class Spectrum < InSilico::Spectrum
48
+ Element = Constants::Libraries::Element
49
+
50
+ # A map of the default [monoisotopic, average] masses for a variety
51
+ # of constants used by Mascot. The element masses can be traced
52
+ # back to {Unimod}[http://www.unimod.org/masses.html] and the
53
+ # residues calculated by using the Unimod masses, then rounding.
54
+ #
55
+ #--
56
+ # Taken from the configuration pages on the Hansen Lab server:
57
+ #
58
+ # - http://hsc-mascot.uchsc.edu/mascot/x-cgi/ms-config.exe?u=1222975681&ELEMENTS_SHOW=1
59
+ # - http://hsc-mascot.uchsc.edu/mascot/x-cgi/ms-config.exe?u=1222975681&AMINOACIDS_SHOW=1
60
+ #
61
+ DEFAULT_MASS_MAP = {}
62
+ DEFAULT_MASS_MAP.merge!(
63
+ Residue::A => [71.037114, 71.0779],
64
+ Residue::R => [156.101111, 156.1857],
65
+ Residue::N => [114.042927, 114.1026],
66
+ Residue::D => [115.026943, 115.0874],
67
+ Residue::C => [103.009185, 103.1429],
68
+ Residue::E => [129.042593, 129.1140],
69
+ Residue::Q => [128.058578, 128.1292],
70
+ Residue::G => [57.021464, 57.0513],
71
+ Residue::H => [137.058912, 137.1393],
72
+ Residue::I => [113.084064, 113.1576],
73
+ Residue::L => [113.084064, 113.1576],
74
+ Residue::K => [128.094963, 128.1723],
75
+ Residue::M => [131.040485, 131.1961],
76
+ Residue::F => [147.068414, 147.1739],
77
+ Residue::P => [97.052764, 97.1152],
78
+ Residue::S => [87.032028, 87.0773],
79
+ Residue::T => [101.047679, 101.1039],
80
+ Residue::W => [186.079313, 186.2099],
81
+ Residue::Y => [163.063329, 163.1733],
82
+ Residue::V => [99.068414, 99.1311],
83
+
84
+ Element::Ag => [106.905092, 107.8682],
85
+ Element::Au => [196.966543, 196.96655],
86
+ Element::Br => [78.9183361, 79.904],
87
+ Element::C => [12, 12.0107],
88
+ Element::Ca => [39.9625906, 40.078],
89
+ Element::Cl => [34.96885272, 35.453],
90
+ Element::Cu => [62.9295989, 63.546],
91
+ Element::F => [18.99840322, 18.9984032],
92
+ Element::Fe => [55.9349393, 55.845],
93
+ Element::H => [1.007825035, 1.00794],
94
+ Element::Hg => [201.970617, 200.59],
95
+ Element::I => [126.904473, 126.90447],
96
+ Element::K => [38.9637074, 39.0983],
97
+ Element::Li => [7.016003, 6.941],
98
+ Element::Mo => [97.9054073, 95.94],
99
+ Element::N => [14.003074, 14.0067],
100
+ Element::Na => [22.9897677, 22.98977],
101
+ Element::Ni => [57.9353462, 58.6934],
102
+ Element::O => [15.99491463, 15.9994],
103
+ Element::P => [30.973762, 30.973761],
104
+ Element::S => [31.9720707, 32.065],
105
+ Element::Se => [79.9165196, 78.96],
106
+ Element::Zn => [63.9291448, 65.409],
107
+
108
+ #'13C' => [13.00335483, 13.00335483],
109
+ #'15N' => [15.00010897, 15.00010897],
110
+ #'18O' => [17.9991603, 17.9991603],
111
+ #'2H' => [2.014101779, 2.014101779],
112
+
113
+ HYDROGEN => [1.007825, 1.0079],
114
+ HYDROXIDE => [17.002740, 17.0073],
115
+ ELECTRON => [0.000549, 0.000549]
116
+ )
117
+
118
+ # Mark prolines to be located by the spectrum,
119
+ # so they may be masked later.
120
+ locate_residues "P"
121
+
122
+ # A hash of masses to use in place of the Element/Molecule
123
+ # masses normally used in calculating a spectrum. By
124
+ # default mass map contains the monoisotopic masses
125
+ # specified in DEFAULT_MASS_MAP.
126
+ #
127
+ # Note: to generate a zero-error spectrum for Mascot, it
128
+ # is important that mass_map contains the exact masses
129
+ # used by the server. If your server uses non-default
130
+ # masses, override the values in DEFAULT_MASS_MAP to
131
+ # affect all instances, or just mass_map to affect a
132
+ # single instance. See:
133
+ #
134
+ # http://your.mascot.server/x-cgi/ms-config.exe
135
+ #
136
+ # To check the mass values for your server.
137
+ attr_reader :mass_map
138
+
139
+ def initialize(sequence, nterm=HYDROGEN, cterm=HYDROXIDE)
140
+ @mass_map = {}
141
+ DEFAULT_MASS_MAP.each_pair do |const, (mono, avg)|
142
+ @mass_map[const] = mono
143
+ end
144
+
145
+ super do |element|
146
+ @mass_map[element]
147
+ end
148
+
149
+ [:a, :b, :c, :cladder].each {|key| mask_locations key, [-1] }
150
+ [:x, :y, :Y, :z, :nladder].each {|key| mask_locations key, [0] }
151
+
152
+ # # mask prolines
153
+ # mask_locations :c, residue_locations['P'].collect {|i| i-1}
154
+ # mask_locations :z, residue_locations['P']
155
+ end
156
+
157
+ protected
158
+
159
+ # looks up a mapped mass from mass_map or reverts to super
160
+ def mass(molecule) # :nodoc:
161
+ @mass_map[molecule] || super
162
+ end
163
+
164
+ # handle_unknown_series maps several Mascot-specific
165
+ # series annotations to their standard counterparts:
166
+ #
167
+ # series+n series + Hn
168
+ # series* series - NH3
169
+ # series0 series - H2O
170
+ # Immon. immonium
171
+ #
172
+ def handle_unknown_series(s)
173
+ case s
174
+ when /^([\w\+\-]+)+(\d+)$/
175
+ self.series("#{$1} +H#{$2.to_i}")
176
+ when /^(\w+)\*(\+*-*)$/
177
+ self.series("#{$1}#{$2} -NH3")
178
+ when /^(\w+)0(\+*-*)$/
179
+ self.series("#{$1}#{$2} -H2O")
180
+ when /^Immon\.(.*)$/
181
+ self.series("immonium#{$1}")
182
+ else
183
+ super
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,41 @@
1
+ require 'tap/http/dispatch'
2
+
3
+ module Ms
4
+ module Mascot
5
+ # :startdoc::manifest submits an mgf file
6
+ # UNDER CONSTRUCTION
7
+ class Submit < Tap::Http::Dispatch
8
+ RESULT_REGEXP = /<A HREF="..\/cgi\/master_results.pl\?file=(.*?\.dat)">/im
9
+ ERROR_REGEXP = /<BR>The following error has occured getting your search details:<BR>(.*?)<BR>/im
10
+ MISTAKE_REGEXP = /<BR>Sorry, your search could not be performed due to the following mistake entering data.<BR>(.*?)<BR>/im
11
+
12
+ def process(*mgf_files)
13
+ # generate request hashes for the mgf files using the
14
+ # configured parameters
15
+ requests = mgf_files.collect do |mgf_file|
16
+ file = {'Content-Type' => 'application/octet-stream', 'Filename' => mgf_file}
17
+ {:params => params.merge("FILE" => file)}
18
+ end
19
+
20
+ super(*requests)
21
+ end
22
+
23
+ # Hook for processing a response. By default process_response
24
+ # simply logs the response message and returns the response.
25
+ def process_response(res)
26
+ case res.body
27
+ when RESULT_REGEXP
28
+ log(res.message, $1)
29
+ $1
30
+
31
+ when ERROR_REGEXP
32
+ raise ResponseError, "error: #{$1.strip}"
33
+ when MISTAKE_REGEXP
34
+ raise ResponseError, "mistake: #{$1.strip}"
35
+ else
36
+ raise ResponseError, "unknown error:\n#{res.body}"
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-mascot
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-03 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.11"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: tap-http
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.0
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: external
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.3.0
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: mspire
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.5.0
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: ms-in_silico
57
+ type: :runtime
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 0.2.0
64
+ version:
65
+ - !ruby/object:Gem::Dependency
66
+ name: ms-testdata
67
+ type: :development
68
+ version_requirement:
69
+ version_requirements: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: 0.0.1
74
+ version:
75
+ description:
76
+ email: simon.a.chiang@gmail.com
77
+ executables: []
78
+
79
+ extensions: []
80
+
81
+ extra_rdoc_files:
82
+ - README
83
+ - MIT-LICENSE
84
+ files:
85
+ - cmd/generate_mgf.rb
86
+ - cmd/generate_prospector_mgf.rb
87
+ - cmd/reformat_mgf.rb
88
+ - lib/ms/mascot.rb
89
+ - lib/ms/mascot/export.rb
90
+ - lib/ms/mascot/fragment.rb
91
+ - lib/ms/mascot/mgf.rb
92
+ - lib/ms/mascot/mgf/archive.rb
93
+ - lib/ms/mascot/mgf/entry.rb
94
+ - lib/ms/mascot/predict.rb
95
+ - lib/ms/mascot/spectrum.rb
96
+ - lib/ms/mascot/submit.rb
97
+ - README
98
+ - MIT-LICENSE
99
+ has_rdoc: true
100
+ homepage: http://mspire.rubyforge.org/projects/ms-mascot/
101
+ post_install_message:
102
+ rdoc_options: []
103
+
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: "0"
111
+ version:
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: "0"
117
+ version:
118
+ requirements: []
119
+
120
+ rubyforge_project: mspire
121
+ rubygems_version: 1.3.1
122
+ signing_key:
123
+ specification_version: 2
124
+ summary: An Mspire library supporting Mascot.
125
+ test_files: []
126
+