ms-mascot 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2008, Regents of the University of Colorado.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
4
+ software and associated documentation files (the "Software"), to deal in the Software
5
+ without restriction, including without limitation the rights to use, copy, modify, merge,
6
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7
+ to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or
10
+ substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ = {Ms-Mascot}[http://mspire.rubyforge.org/projects/ms-mascot]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting {Mascot}[http://www.matrixscience.com/].
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/bahuvrihi/ms-mascot/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Mascot is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-mascot
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
20
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
21
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
22
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,123 @@
1
+ # = Usage
2
+ # tap generate_mgf {options} protein_sequences
3
+ #
4
+ # When specifying the ions to include, alternate charge states can be
5
+ # specified using + and -, for example 'y++' or 'b-'. The available ion
6
+ # series are [a,b,c,x,y,z].
7
+ #
8
+ # = Description
9
+ # Digests, fragments, then formats the protein sequences into mgf files.
10
+ # Use the options to specify/modify digestion enzymes, as well as the
11
+ # type of ions to generate.
12
+ #
13
+ # = Information
14
+ #
15
+ # Copyright (c) 2006-2007, Regents of the University of Colorado.
16
+ # Developer:: Simon Chiang, Biomolecular Structure Program
17
+ # Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
18
+ # Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
19
+ #
20
+
21
+ require 'tap/script'
22
+ include Constants::Library
23
+
24
+ app = Tap::App.instance
25
+
26
+ #
27
+ # handle options
28
+ #
29
+
30
+ opts = Prospector::Digest.configurations.to_opts
31
+ opts += Mascot::Formats::Mgf::Print.configurations.to_opts
32
+ opts += [
33
+ ['--charge', '-c', GetoptLong::REQUIRED_ARGUMENT, "Parent ion charge for mgf files. (default +1)"],
34
+
35
+ ['--ions', '-i', GetoptLong::REQUIRED_ARGUMENT, "Comma-separated string of ion series to include. (default 'yb')"],
36
+ #['--enzyme_file', nil, GetoptLong::REQUIRED_ARGUMENT, "Specifes a Prospector-style enzyme config file."],
37
+ ['--residue_precision', nil, GetoptLong::REQUIRED_ARGUMENT, "The precision of residues, ex 6 for 57.021464"],
38
+ ['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
39
+ ['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifes debug mode."]]
40
+
41
+ digest_config = {}
42
+ print_config = {}
43
+ series = "yb"
44
+ charge = 1
45
+ residue_precision = 6
46
+
47
+ Tap::Script.handle_options(*opts) do |opt, value|
48
+ case opt
49
+ when '--help'
50
+ puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
51
+ puts
52
+ puts Tap::Script.usage_options(opts)
53
+ exit
54
+
55
+ when '--debug'
56
+ app.options.debug = true
57
+
58
+ when '--ions'
59
+ series = value
60
+
61
+ when '--charge'
62
+ charge = value.to_i
63
+
64
+ when '--residue_precision'
65
+ residue_precision = value.to_i
66
+
67
+ else
68
+ key = Prospector::Digest.configurations.opt_map(opt)
69
+ digest_config[key] = YAML.load(value) if key
70
+
71
+ key = Mascot::Formats::Mgf::Print.configurations.opt_map(opt)
72
+ print_config[key] = YAML.load(value) if key
73
+ end
74
+ end
75
+
76
+ if ARGV.empty?
77
+ puts "no sequences specified"
78
+ exit
79
+ end
80
+
81
+ #
82
+ # add your script code here
83
+ #
84
+ series = series.scan(/\w\-*\+*/)
85
+
86
+ #loader = Prospector::LoadDigesters.new
87
+ #loader.enq(enzyme_file)
88
+
89
+ #
90
+ digest = Prospector::Digest.new(nil, digest_config)
91
+
92
+ #
93
+ n = Molecule[digest.nterm]
94
+ c = Molecule[digest.cterm]
95
+
96
+ fragment = Tap::Task.new do |task, polypeptides|
97
+ polypeptides.collect do |polypeptide, start_index, end_index|
98
+ task.log :fragment, polypeptide.sequence[0..10] + (polypeptide.sequence.length > 10 ? "..." : "")
99
+
100
+ f = Mascot::FragmentSpectrum.new(polypeptide.sequence, n, c, residue_precision)
101
+
102
+ headers = {
103
+ :title => polypeptide.sequence,
104
+ :charge => charge,
105
+ :pepmass => (f.mass(n) + f.ladder.last + f.mass(c) + charge * f.proton_mass)/charge
106
+ }
107
+
108
+ data = series.collect {|s| f.series(s)}.flatten.delete_if {|mass| mass < 0 }.sort
109
+ data = [data, Array.new(data.length, 1)].transpose
110
+
111
+ Mascot::Formats::Mgf::Entry.new(headers, data)
112
+ end
113
+ end
114
+
115
+ #
116
+ print = Mascot::Formats::Mgf::Print.new('generate_mgf', print_config)
117
+
118
+ # workflow
119
+ digest.enq(*ARGV)
120
+ ARGV.clear
121
+
122
+ app.sequence(digest, fragment, print)
123
+ app.run
@@ -0,0 +1,123 @@
1
+ # = Usage
2
+ # tap generate_mgf {options} protein_sequences
3
+ #
4
+ # When specifying the ions to include, alternate charge states can be
5
+ # specified using + and -, for example 'y++' or 'b-'. The available ion
6
+ # series are [a,b,c,x,y,z].
7
+ #
8
+ # = Description
9
+ # Digests, fragments, then formats the protein sequences into mgf files.
10
+ # Use the options to specify/modify digestion enzymes, as well as the
11
+ # type of ions to generate.
12
+ #
13
+ # = Information
14
+ #
15
+ # Copyright (c) 2006-2007, Regents of the University of Colorado.
16
+ # Developer:: Simon Chiang, Biomolecular Structure Program
17
+ # Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
18
+ # Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
19
+ #
20
+
21
+ require 'tap/script'
22
+ include Constants::Library
23
+
24
+ app = Tap::App.instance
25
+
26
+ #
27
+ # handle options
28
+ #
29
+
30
+ opts = Prospector::Digest.configurations.to_opts
31
+ opts += Mascot::Formats::Mgf::Print.configurations.to_opts
32
+ opts += [
33
+ ['--charge', '-c', GetoptLong::REQUIRED_ARGUMENT, "Parent ion charge for mgf files. (default +1)"],
34
+
35
+ ['--ions', '-i', GetoptLong::REQUIRED_ARGUMENT, "Comma-separated string of ion series to include. (default 'yb')"],
36
+ #['--enzyme_file', nil, GetoptLong::REQUIRED_ARGUMENT, "Specifes a Prospector-style enzyme config file."],
37
+ ['--residue_precision', nil, GetoptLong::REQUIRED_ARGUMENT, "The precision of residues, ex 6 for 57.021464"],
38
+ ['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
39
+ ['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifes debug mode."]]
40
+
41
+ digest_config = {}
42
+ print_config = {}
43
+ series = "yb"
44
+ charge = 1
45
+ residue_precision = 6
46
+
47
+ Tap::Script.handle_options(*opts) do |opt, value|
48
+ case opt
49
+ when '--help'
50
+ puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
51
+ puts
52
+ puts Tap::Script.usage_options(opts)
53
+ exit
54
+
55
+ when '--debug'
56
+ app.options.debug = true
57
+
58
+ when '--ions'
59
+ series = value
60
+
61
+ when '--charge'
62
+ charge = value.to_i
63
+
64
+ when '--residue_precision'
65
+ residue_precision = value.to_i
66
+
67
+ else
68
+ key = Prospector::Digest.configurations.opt_map(opt)
69
+ digest_config[key] = YAML.load(value) if key
70
+
71
+ key = Mascot::Formats::Mgf::Print.configurations.opt_map(opt)
72
+ print_config[key] = YAML.load(value) if key
73
+ end
74
+ end
75
+
76
+ if ARGV.empty?
77
+ puts "no sequences specified"
78
+ exit
79
+ end
80
+
81
+ #
82
+ # add your script code here
83
+ #
84
+ series = series.scan(/\w\-*\+*/)
85
+
86
+ #loader = Prospector::LoadDigesters.new
87
+ #loader.enq(enzyme_file)
88
+
89
+ #
90
+ digest = Prospector::Digest.new(nil, digest_config)
91
+
92
+ #
93
+ n = Molecule[digest.nterm]
94
+ c = Molecule[digest.cterm]
95
+
96
+ fragment = Tap::Task.new do |task, polypeptides|
97
+ polypeptides.collect do |polypeptide, start_index, end_index|
98
+ task.log :fragment, polypeptide.sequence[0..10] + (polypeptide.sequence.length > 10 ? "..." : "")
99
+
100
+ f = Prospector::FragmentSpectrum.new(polypeptide.sequence, n, c)
101
+
102
+ headers = {
103
+ :title => polypeptide.sequence,
104
+ :charge => charge,
105
+ :pepmass => (n.mass + polypeptide.mass + c.mass + charge * (Molecule['H'].mass - Particle['Electron'].mass))/charge
106
+ }
107
+
108
+ data = series.collect {|s| f.series(s)}.flatten.delete_if {|mass| mass < 0 }.sort
109
+ data = [data, Array.new(data.length, 1)].transpose
110
+
111
+ Mascot::Formats::Mgf::Entry.new(headers, data)
112
+ end
113
+ end
114
+
115
+ #
116
+ print = Mascot::Formats::Mgf::Print.new('generate_mgf', print_config)
117
+
118
+ # workflow
119
+ digest.enq(*ARGV)
120
+ ARGV.clear
121
+
122
+ app.sequence(digest, fragment, print)
123
+ app.run
@@ -0,0 +1,90 @@
1
+ # = Usage
2
+ # tap reformat_mgf {options} MGF_FILES
3
+ #
4
+ # = Description
5
+ # Reformats mgf files to a standard output like:
6
+ #
7
+ # BEGIN IONS
8
+ # TITLE=7100401blank.190.190.2.dta
9
+ # CHARGE=2+
10
+ # PEPMASS=321.571138
11
+ # 100.266 2.0
12
+ # 111.323 2.5
13
+ # ...
14
+ # 496.110 3.3
15
+ # 601.206 1.3
16
+ # END IONS
17
+ #
18
+ # = Information
19
+ #
20
+ # Copyright (c) 2006-2007, Regents of the University of Colorado.
21
+ # Developer:: Simon Chiang, Biomolecular Structure Program
22
+ # Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
23
+ # Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
24
+ #
25
+ require 'tap/script'
26
+
27
+ app = Tap::App.instance
28
+
29
+ #
30
+ # handle options
31
+ #
32
+
33
+ opts = [
34
+ ['--target_dir', '-t', GetoptLong::REQUIRED_ARGUMENT, "Specify an output directory."],
35
+ ['--mz_precision', '-m', GetoptLong::REQUIRED_ARGUMENT, "Specify the mz precision."],
36
+ ['--intensity_precision', '-i', GetoptLong::REQUIRED_ARGUMENT, "Specify the intensity precision."],
37
+ ['--pepmass_precision', '-p', GetoptLong::REQUIRED_ARGUMENT, "Specify the peptide mass precision."],
38
+ ['--headers', nil, GetoptLong::REQUIRED_ARGUMENT, "Specify the headers to include, separated by commas."],
39
+ ['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
40
+ ['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifies debug mode."]]
41
+
42
+ config = {:target_dir => 'reformatted'}
43
+
44
+ Tap::Script.handle_options(*opts) do |opt, value|
45
+ case opt
46
+ when '--help'
47
+ puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
48
+ puts
49
+ puts Tap::Script.usage_options(opts)
50
+ exit
51
+
52
+ when '--debug'
53
+ app.options.debug = true
54
+
55
+ when '--headers'
56
+ value = value[1..-2] if value[0] == 34 && value[-1] == 34
57
+ config[:headers] = value.split(/,/).collect {|header| header.strip}
58
+ else
59
+ opt =~ /--(.*)/
60
+ config[$1.to_sym] = value
61
+
62
+ end
63
+ end
64
+
65
+ #
66
+ # add your script code here
67
+ #
68
+
69
+ require 'mascot/formats/mgf'
70
+
71
+ reformat = Tap::FileTask.new("", config) do |task, input|
72
+ target = task.filepath(task.config[:target_dir], File.basename(input))
73
+ task.prepare(target)
74
+
75
+ task.log_basename :reformatting, input
76
+ Mascot::Formats::Mgf::Archive.open(input) do |archive|
77
+ archive.reindex if archive.length == 0
78
+
79
+ File.open(target, "wb") do |output|
80
+ archive.each do |mgf|
81
+ mgf.puts(output, task.config)
82
+ output.puts
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ args = ARGV.dup
89
+ ARGV.clear
90
+ app.run(reformat, *args)
@@ -0,0 +1,5 @@
1
+ module Ms
2
+ module Mascot
3
+ FRAGMENT_TEST_MASS_UNCERTAINTY = 10**-2
4
+ end
5
+ end
@@ -0,0 +1,20 @@
1
+ require 'tap/http/dispatch'
2
+
3
+ module Ms
4
+ module Mascot
5
+ # :startdoc::manifest exports results from a search
6
+ # UNDER CONSTRUCTION
7
+ class Export < Tap::Http::Dispatch
8
+
9
+ def process(*mascot_files)
10
+ # generate request hashes for the mgf files using the
11
+ # configured parameters
12
+ requests = mascot_files.collect do |mascot_file|
13
+ {:params => params.merge("file" => mascot_file)}
14
+ end
15
+
16
+ super(*requests)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,39 @@
1
+ require 'ms/in_silico/fragment'
2
+ require 'ms/mascot/spectrum'
3
+
4
+ module Ms
5
+ module Mascot
6
+
7
+ # Ms::Mascot::Fragment::manifest calculates a theoretical Mascot ms/ms spectrum
8
+ #
9
+ # Calculates the parent ion mass and theoretical ms/ms spectrum for a peptide
10
+ # sequence. Configurations allow the specification of one or more
11
+ # fragmentation series to include, as well as charge, and intensity.
12
+ #
13
+ # % rap fragment TVQQEL --+ dump --no-audit
14
+ # # date: 2008-09-15 14:37:55
15
+ # ---
16
+ # ms/mascot/fragment (:...:):
17
+ # - - 717.3777467
18
+ # - - 102.054955
19
+ # - 132.1019047
20
+ # - 201.123369
21
+ # - 261.1444977
22
+ # - 329.181947
23
+ # - 389.2030757
24
+ # - 457.240525
25
+ # - 517.2616537
26
+ # - 586.283118
27
+ # - 616.3300677
28
+ #
29
+ # In the output, the parent ion mass is given first, followed by an array of
30
+ # the sorted fragmentation data.
31
+ class Fragment < InSilico::Fragment
32
+
33
+ def spectrum(peptide)
34
+ Mascot::Spectrum.new(peptide, nterm, cterm)
35
+ end
36
+
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,2 @@
1
+ require 'mascot/formats/mgf/entry'
2
+ require 'mascot/formats/mgf/archive'
@@ -0,0 +1,24 @@
1
+ require 'external'
2
+ require 'ms/mascot/mgf/entry'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Mgf
7
+
8
+ # Provides array-like access to an mgf archival file.
9
+ class Archive < ExternalArchive
10
+
11
+ # Reindexes self to each mgf entry in io
12
+ def reindex(&block)
13
+ reindex_by_sep("BEGIN IONS", :entry_follows_sep => true, &block)
14
+ end
15
+
16
+ # Returns an Mgf::Entry initialized using str
17
+ def str_to_entry(str)
18
+ Entry.parse(str)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,162 @@
1
+ module Ms
2
+ module Mascot
3
+ module Mgf
4
+
5
+ # Represents a mascot generic file (mgf) formatted entry.
6
+ #
7
+ # BEGIN IONS
8
+ # TITLE=7100401blank.190.190.2.dta
9
+ # CHARGE=2+
10
+ # PEPMASS=321.571138
11
+ # 100.266 2.0
12
+ # 111.323 2.5
13
+ # ...
14
+ # 496.110 3.3
15
+ # 601.206 1.3
16
+ # END IONS
17
+ #
18
+ class Entry
19
+ class << self
20
+
21
+ # Parses the entry string into an Mgf::Entry. The entry must be
22
+ # complete and properly formatted, ie it must begin with a
23
+ # 'BEGIN IONS' line and end with an 'END IONS' line.
24
+ def parse(str)
25
+ entry = Entry.new
26
+
27
+ lines = str.strip.split(/\s*\r?\n\s*/)
28
+
29
+ unless lines.shift == "BEGIN IONS"
30
+ raise ArgumentError, "input should begin with 'BEGIN IONS'"
31
+ end
32
+
33
+ unless lines.pop == "END IONS"
34
+ raise ArgumentError, "input should end with 'END IONS'"
35
+ end
36
+
37
+ lines.each do |line|
38
+ if line =~ /^(.*?)=(.*)$/
39
+ entry[$1] = $2
40
+ else
41
+ entry.data << line.split(/\s+/, 2).collect {|i| i.to_f }
42
+ end
43
+ end
44
+
45
+ entry
46
+ end
47
+ end
48
+
49
+ # A hash of mgf headers, not including CHARGE and PEPMASS
50
+ attr_reader :headers
51
+
52
+ # The charge of the entry
53
+ attr_accessor :charge
54
+
55
+ # The peptide mass of the entry
56
+ attr_accessor :pepmass
57
+
58
+ # The data (mz/intensity) for the entry
59
+ attr_accessor :data
60
+
61
+ # Initialized a new Entry using the headers and data. Set charge
62
+ # and pepmass using the CHARGE and PEPMASS headers.
63
+ def initialize(headers={}, data=[])
64
+ @headers = {}
65
+ @pepmass = nil
66
+ @charge = nil
67
+ @data = data
68
+
69
+ headers.each_pair do |key, value|
70
+ self[key] = value
71
+ end
72
+ end
73
+
74
+ # Retrieve a header using an mgf header string. CHARGE and PEPMASS
75
+ # headers can be retrieved using [], and will reflect the current
76
+ # values of charge and pepmass. Keys are stringified and upcased.
77
+ def [](key)
78
+ key = key.to_s.upcase
79
+ case key
80
+ when "PEPMASS" then pepmass.to_s
81
+ when "CHARGE" then charge_to_s
82
+ else headers[key]
83
+ end
84
+ end
85
+
86
+ # Set a header using an mgf header string. CHARGE and PEPMASS headers
87
+ # may be set using using []=, and will modify the current values of
88
+ # charge and pepmass. Keys are stringified and upcased.
89
+ def []=(key, value)
90
+ key = key.to_s.upcase
91
+ case key
92
+ when "PEPMASS"
93
+ self.pepmass = value.to_f
94
+ when "CHARGE"
95
+ value = case value
96
+ when Fixnum then value
97
+ when /^(\d+)([+-])$/ then $1.to_i * ($2 == "+" ? 1 : -1)
98
+ else raise "charge should be an number, or a string formatted like '1+' or '1-'"
99
+ end
100
+
101
+ self.charge = value
102
+ else
103
+ headers[key] = value
104
+ end
105
+ end
106
+
107
+ # Formats and puts self to the target. Use the options to modify the
108
+ # output:
109
+ #
110
+ # headers:: an array of headers to include (by default all headers
111
+ # will be included; pepmass and charge will always be
112
+ # included)
113
+ # pepmass_precision:: integer value specifying precision of pepmass
114
+ # mz_precision:: integer value specifying precision of mz values
115
+ # intensity_precision:: integer value specifying precision of intensity
116
+ # values
117
+ def dump(target="", options={})
118
+ options = {
119
+ :mz_precision => nil,
120
+ :intensity_precision => nil,
121
+ :pepmass_precision => nil,
122
+ :headers => nil
123
+ }.merge(options)
124
+
125
+ target << "BEGIN IONS\n"
126
+ (options[:headers] || headers.keys).each do |key|
127
+ target << "#{key.upcase}=#{headers[key]}\n"
128
+ end
129
+
130
+ target << "CHARGE=#{charge_to_s}\n"
131
+ target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
132
+
133
+ data_format = "#{format options[:mz_precision]} #{format options[:intensity_precision]}\n"
134
+ data.each do |data_point|
135
+ target << (data_format % data_point)
136
+ end
137
+
138
+ target << "END IONS\n"
139
+ target
140
+ end
141
+
142
+ # Returns self formatted as a string
143
+ def to_s
144
+ dump
145
+ end
146
+
147
+ private
148
+
149
+ # formats the charge as a string
150
+ def charge_to_s # :nodoc:
151
+ charge == nil ? "" : "#{charge.abs}#{charge > 0 ? '+' : '-'}"
152
+ end
153
+
154
+ # returns a format string for the specified precision
155
+ def format(precision) # :nodoc:
156
+ precision == nil ? "%s" : "%.#{precision}f"
157
+ end
158
+
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,94 @@
1
+ require 'ms/in_silico/digest'
2
+ require 'ms/mascot/fragment'
3
+ require 'ms/mascot/mgf/archive'
4
+
5
+ module Ms
6
+ module Mascot
7
+
8
+ # Ms::Mascot::Predict::manifest predicts the spectra for a protein sequence
9
+ #
10
+ # Fragments a protein sequence and calculates the fragment spectra for
11
+ # each peptide. The peptide spectra are formatted as mgf and dumped to
12
+ # the target.
13
+ #
14
+ # % rap predict MAEELVLERCDLELETNGRDHHTADLCREKLVVRRGQPFWLTLHFEGRNYEASVDSLTFS
15
+ # I[16:30:19] digest MAEELVLERCD... to 15 peptides
16
+ # I[16:30:19] fragment MAEELVLER
17
+ # I[16:30:19] fragment MAEELVLERCDLELETNGR
18
+ # I[16:30:19] fragment CDLELETNGR
19
+ # I[16:30:19] fragment CDLELETNGRDHHTADLCR
20
+ # I[16:30:19] fragment DHHTADLCR
21
+ # I[16:30:19] fragment DHHTADLCREK
22
+ # I[16:30:19] fragment EKLVVR
23
+ # I[16:30:19] fragment LVVR
24
+ # I[16:30:19] fragment LVVRR
25
+ # I[16:30:19] fragment RGQPFWLTLHFEGR
26
+ # I[16:30:19] fragment GQPFWLTLHFEGR
27
+ # I[16:30:19] fragment GQPFWLTLHFEGRNYEASVDSLTFS
28
+ # I[16:30:19] fragment NYEASVDSLTFS
29
+ #
30
+ class Predict < Tap::FileTask
31
+ define :digest, InSilico::Digest, {:max_misses => 1}
32
+ define :fragment, Mascot::Fragment, {:intensity => 1, :unmask => true, :sort => true}
33
+
34
+ config :headers, nil, &c.hash_or_nil # a hash of headers to include
35
+ config :min_length, 3, &c.integer_or_nil # the minimum peptide length
36
+ config :mz_precision, 6, &c.integer # the precision of mzs
37
+ config :intensity_precision, 0, &c.integer # the precision of intensities
38
+ config :pepmass_precision, 6, &c.integer # the precision of peptide mass
39
+
40
+ # Sequences digest and fragment. When fragment completes, it will add
41
+ # a new mgf entry to the internal entries collection.
42
+ def workflow
43
+ digest.on_complete do |_results|
44
+ _results._iterate.each do |_result|
45
+ next if min_length && _result._current.length < min_length
46
+ fragment._execute(_result)
47
+ end
48
+ end
49
+
50
+ fragment.on_complete do |_result|
51
+ parent_ion_mass, data = _result._current
52
+ next if data.empty?
53
+
54
+ peptide = _result._values[-2]
55
+ headers = {
56
+ 'TITLE' => "#{peptide} (#{fragment.series.join(', ')})",
57
+ 'CHARGE' => fragment.charge,
58
+ 'PEPMASS' => parent_ion_mass}
59
+
60
+ @entries << Mgf::Entry.new(headers, data)
61
+ end
62
+ end
63
+
64
+ # Infers a default path for the output mgf file from the sequence; the
65
+ # path is the sequence if the sequence is less than 10 characters,
66
+ # otherwise it's like: "<first five>_<last five>.mgf"
67
+ #
68
+ def default_path(sequence)
69
+ sequence = "#{sequence[0,5]}_#{sequence[-5,5]}" if sequence.length > 10
70
+ "#{sequence}.mgf"
71
+ end
72
+
73
+ def process(sequence, target=nil)
74
+ sequence = sequence.gsub(/\s/, "")
75
+
76
+ @entries = []
77
+ digest.execute(sequence)
78
+
79
+ # prepare and dump the predicted spectra
80
+ # to the target path.
81
+ target = default_path(sequence) if target == nil
82
+ prepare(target)
83
+ File.open(target, "wb") do |file|
84
+ @entries.each do |entry|
85
+ entry.dump(file, config)
86
+ file.puts
87
+ end
88
+ end
89
+
90
+ target
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,188 @@
1
+ require 'ms/in_silico/spectrum'
2
+
3
+ module Ms
4
+ module Mascot
5
+
6
+ # Generates a Mascot-style theoretical spectrum. When the masses are
7
+ # set correctly, the theoretical spectrum will have zero error and
8
+ # full coverage (for whatever series are generated) when identified
9
+ # using Mascot.
10
+ #
11
+ # === Peptide Mass Error
12
+ #
13
+ # The peptide mass calculated by Spectrum is inexact wrt to Mascot.
14
+ # Mascot uses some unknown algorithm to speed up it's calculation
15
+ # and introduces some rounding/truncation error somewhere along
16
+ # the line. For instance, if you calculate the mass of a peptide
17
+ # by directly using the Unimod masses, it will NOT be the mass used
18
+ # by Mascot. For example:
19
+ #
20
+ # def molecule_mass(c, h, n, o, s)
21
+ # c * 12 + h * 1.007825035 + n * 14.003074 + o * 15.99491463 + s * 31.9720707
22
+ # end
23
+ #
24
+ # # Formula for MFSFVDLR: C(47)H(69)N(11)O(11)S(0)
25
+ # # Formula for water: C(0) H(2) N(0) O(1) S(0)
26
+ #
27
+ # molecule_mass(47, 69, 11, 11, 1) + molecule_mass(0, 2, 0, 1, 0)
28
+ # # => 1013.500437745
29
+ #
30
+ # Now by comparision:
31
+ #
32
+ # mascot: 1013.500443
33
+ # unimod: 1013.500437745
34
+ # delta: 0.000005255
35
+ #
36
+ # Similar or worse errors are typical and cannot be elimited by
37
+ # any known permutation (calculating from the residue masses,
38
+ # rounding etc). See http://gist.github.com/31241 for tasks that
39
+ # perform the calculation using various permutations.
40
+ #
41
+ # One helpful note if you try to break the code, you can set the
42
+ # number of sig figs to 6 in mascot.dat (MassDecimalPlaces) and
43
+ # read the exact peptide mass numbers directly from a result page.
44
+ #
45
+ # Spectrum calculates peptide mass using the masses in mass_map,
46
+ # ie the rounded residue masses.
47
+ class Spectrum < InSilico::Spectrum
48
+ Element = Constants::Libraries::Element
49
+
50
+ # A map of the default [monoisotopic, average] masses for a variety
51
+ # of constants used by Mascot. The element masses can be traced
52
+ # back to {Unimod}[http://www.unimod.org/masses.html] and the
53
+ # residues calculated by using the Unimod masses, then rounding.
54
+ #
55
+ #--
56
+ # Taken from the configuration pages on the Hansen Lab server:
57
+ #
58
+ # - http://hsc-mascot.uchsc.edu/mascot/x-cgi/ms-config.exe?u=1222975681&ELEMENTS_SHOW=1
59
+ # - http://hsc-mascot.uchsc.edu/mascot/x-cgi/ms-config.exe?u=1222975681&AMINOACIDS_SHOW=1
60
+ #
61
+ DEFAULT_MASS_MAP = {}
62
+ DEFAULT_MASS_MAP.merge!(
63
+ Residue::A => [71.037114, 71.0779],
64
+ Residue::R => [156.101111, 156.1857],
65
+ Residue::N => [114.042927, 114.1026],
66
+ Residue::D => [115.026943, 115.0874],
67
+ Residue::C => [103.009185, 103.1429],
68
+ Residue::E => [129.042593, 129.1140],
69
+ Residue::Q => [128.058578, 128.1292],
70
+ Residue::G => [57.021464, 57.0513],
71
+ Residue::H => [137.058912, 137.1393],
72
+ Residue::I => [113.084064, 113.1576],
73
+ Residue::L => [113.084064, 113.1576],
74
+ Residue::K => [128.094963, 128.1723],
75
+ Residue::M => [131.040485, 131.1961],
76
+ Residue::F => [147.068414, 147.1739],
77
+ Residue::P => [97.052764, 97.1152],
78
+ Residue::S => [87.032028, 87.0773],
79
+ Residue::T => [101.047679, 101.1039],
80
+ Residue::W => [186.079313, 186.2099],
81
+ Residue::Y => [163.063329, 163.1733],
82
+ Residue::V => [99.068414, 99.1311],
83
+
84
+ Element::Ag => [106.905092, 107.8682],
85
+ Element::Au => [196.966543, 196.96655],
86
+ Element::Br => [78.9183361, 79.904],
87
+ Element::C => [12, 12.0107],
88
+ Element::Ca => [39.9625906, 40.078],
89
+ Element::Cl => [34.96885272, 35.453],
90
+ Element::Cu => [62.9295989, 63.546],
91
+ Element::F => [18.99840322, 18.9984032],
92
+ Element::Fe => [55.9349393, 55.845],
93
+ Element::H => [1.007825035, 1.00794],
94
+ Element::Hg => [201.970617, 200.59],
95
+ Element::I => [126.904473, 126.90447],
96
+ Element::K => [38.9637074, 39.0983],
97
+ Element::Li => [7.016003, 6.941],
98
+ Element::Mo => [97.9054073, 95.94],
99
+ Element::N => [14.003074, 14.0067],
100
+ Element::Na => [22.9897677, 22.98977],
101
+ Element::Ni => [57.9353462, 58.6934],
102
+ Element::O => [15.99491463, 15.9994],
103
+ Element::P => [30.973762, 30.973761],
104
+ Element::S => [31.9720707, 32.065],
105
+ Element::Se => [79.9165196, 78.96],
106
+ Element::Zn => [63.9291448, 65.409],
107
+
108
+ #'13C' => [13.00335483, 13.00335483],
109
+ #'15N' => [15.00010897, 15.00010897],
110
+ #'18O' => [17.9991603, 17.9991603],
111
+ #'2H' => [2.014101779, 2.014101779],
112
+
113
+ HYDROGEN => [1.007825, 1.0079],
114
+ HYDROXIDE => [17.002740, 17.0073],
115
+ ELECTRON => [0.000549, 0.000549]
116
+ )
117
+
118
+ # Mark prolines to be located by the spectrum,
119
+ # so they may be masked later.
120
+ locate_residues "P"
121
+
122
+ # A hash of masses to use in place of the Element/Molecule
123
+ # masses normally used in calculating a spectrum. By
124
+ # default mass map contains the monoisotopic masses
125
+ # specified in DEFAULT_MASS_MAP.
126
+ #
127
+ # Note: to generate a zero-error spectrum for Mascot, it
128
+ # is important that mass_map contains the exact masses
129
+ # used by the server. If your server uses non-default
130
+ # masses, override the values in DEFAULT_MASS_MAP to
131
+ # affect all instances, or just mass_map to affect a
132
+ # single instance. See:
133
+ #
134
+ # http://your.mascot.server/x-cgi/ms-config.exe
135
+ #
136
+ # To check the mass values for your server.
137
+ attr_reader :mass_map
138
+
139
+ def initialize(sequence, nterm=HYDROGEN, cterm=HYDROXIDE)
140
+ @mass_map = {}
141
+ DEFAULT_MASS_MAP.each_pair do |const, (mono, avg)|
142
+ @mass_map[const] = mono
143
+ end
144
+
145
+ super do |element|
146
+ @mass_map[element]
147
+ end
148
+
149
+ [:a, :b, :c, :cladder].each {|key| mask_locations key, [-1] }
150
+ [:x, :y, :Y, :z, :nladder].each {|key| mask_locations key, [0] }
151
+
152
+ # # mask prolines
153
+ # mask_locations :c, residue_locations['P'].collect {|i| i-1}
154
+ # mask_locations :z, residue_locations['P']
155
+ end
156
+
157
+ protected
158
+
159
+ # looks up a mapped mass from mass_map or reverts to super
160
+ def mass(molecule) # :nodoc:
161
+ @mass_map[molecule] || super
162
+ end
163
+
164
+ # handle_unknown_series maps several Mascot-specific
165
+ # series annotations to their standard counterparts:
166
+ #
167
+ # series+n series + Hn
168
+ # series* series - NH3
169
+ # series0 series - H2O
170
+ # Immon. immonium
171
+ #
172
+ def handle_unknown_series(s)
173
+ case s
174
+ when /^([\w\+\-]+)+(\d+)$/
175
+ self.series("#{$1} +H#{$2.to_i}")
176
+ when /^(\w+)\*(\+*-*)$/
177
+ self.series("#{$1}#{$2} -NH3")
178
+ when /^(\w+)0(\+*-*)$/
179
+ self.series("#{$1}#{$2} -H2O")
180
+ when /^Immon\.(.*)$/
181
+ self.series("immonium#{$1}")
182
+ else
183
+ super
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,41 @@
1
+ require 'tap/http/dispatch'
2
+
3
+ module Ms
4
+ module Mascot
5
+ # :startdoc::manifest submits an mgf file
6
+ # UNDER CONSTRUCTION
7
+ class Submit < Tap::Http::Dispatch
8
+ RESULT_REGEXP = /<A HREF="..\/cgi\/master_results.pl\?file=(.*?\.dat)">/im
9
+ ERROR_REGEXP = /<BR>The following error has occured getting your search details:<BR>(.*?)<BR>/im
10
+ MISTAKE_REGEXP = /<BR>Sorry, your search could not be performed due to the following mistake entering data.<BR>(.*?)<BR>/im
11
+
12
+ def process(*mgf_files)
13
+ # generate request hashes for the mgf files using the
14
+ # configured parameters
15
+ requests = mgf_files.collect do |mgf_file|
16
+ file = {'Content-Type' => 'application/octet-stream', 'Filename' => mgf_file}
17
+ {:params => params.merge("FILE" => file)}
18
+ end
19
+
20
+ super(*requests)
21
+ end
22
+
23
+ # Hook for processing a response. By default process_response
24
+ # simply logs the response message and returns the response.
25
+ def process_response(res)
26
+ case res.body
27
+ when RESULT_REGEXP
28
+ log(res.message, $1)
29
+ $1
30
+
31
+ when ERROR_REGEXP
32
+ raise ResponseError, "error: #{$1.strip}"
33
+ when MISTAKE_REGEXP
34
+ raise ResponseError, "mistake: #{$1.strip}"
35
+ else
36
+ raise ResponseError, "unknown error:\n#{res.body}"
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-mascot
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-03 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.11"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: tap-http
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.0
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: external
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.3.0
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: mspire
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.5.0
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: ms-in_silico
57
+ type: :runtime
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 0.2.0
64
+ version:
65
+ - !ruby/object:Gem::Dependency
66
+ name: ms-testdata
67
+ type: :development
68
+ version_requirement:
69
+ version_requirements: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: 0.0.1
74
+ version:
75
+ description:
76
+ email: simon.a.chiang@gmail.com
77
+ executables: []
78
+
79
+ extensions: []
80
+
81
+ extra_rdoc_files:
82
+ - README
83
+ - MIT-LICENSE
84
+ files:
85
+ - cmd/generate_mgf.rb
86
+ - cmd/generate_prospector_mgf.rb
87
+ - cmd/reformat_mgf.rb
88
+ - lib/ms/mascot.rb
89
+ - lib/ms/mascot/export.rb
90
+ - lib/ms/mascot/fragment.rb
91
+ - lib/ms/mascot/mgf.rb
92
+ - lib/ms/mascot/mgf/archive.rb
93
+ - lib/ms/mascot/mgf/entry.rb
94
+ - lib/ms/mascot/predict.rb
95
+ - lib/ms/mascot/spectrum.rb
96
+ - lib/ms/mascot/submit.rb
97
+ - README
98
+ - MIT-LICENSE
99
+ has_rdoc: true
100
+ homepage: http://mspire.rubyforge.org/projects/ms-mascot/
101
+ post_install_message:
102
+ rdoc_options: []
103
+
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: "0"
111
+ version:
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: "0"
117
+ version:
118
+ requirements: []
119
+
120
+ rubyforge_project: mspire
121
+ rubygems_version: 1.3.1
122
+ signing_key:
123
+ specification_version: 2
124
+ summary: An Mspire library supporting Mascot.
125
+ test_files: []
126
+