ms-mascot 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,84 @@
1
- require 'tap/http/dispatch'
1
+ require 'tap/http/submit'
2
2
 
3
3
  module Ms
4
4
  module Mascot
5
5
  # :startdoc::manifest exports results from a search
6
- # UNDER CONSTRUCTION
7
- class Export < Tap::Http::Dispatch
6
+ class Export < Tap::Http::Submit
8
7
 
9
- def process(*mascot_files)
10
- # generate request hashes for the mgf files using the
11
- # configured parameters
12
- requests = mascot_files.collect do |mascot_file|
13
- {:params => params.merge("file" => mascot_file)}
14
- end
8
+ # The MatrixScience public search site
9
+ DEFAULT_URI = "http://www.matrixscience.com/cgi/export_dat_2.pl"
10
+
11
+ # Parameters for a typical export
12
+ DEFAULT_PARAMS = {
13
+ "pep_expect"=>"1",
14
+ "prot_mass"=>"1",
15
+ "protein_master"=>"1",
16
+ "_server_mudpit_switch"=>"0.000000001",
17
+ "pep_exp_mz"=>"1",
18
+ "do_export"=>"1",
19
+ "pep_delta"=>"1",
20
+ "export_format"=>"XML",
21
+ "prot_acc"=>"1",
22
+ "pep_score"=>"1",
23
+ "show_format"=>"1",
24
+ "_showsubsets"=>"0",
25
+ "_show_decoy_report"=>"",
26
+ "pep_scan_title"=>"1",
27
+ "pep_miss"=>"1",
28
+ "pep_calc_mr"=>"1",
29
+ "pep_exp_mr"=>"1",
30
+ "prot_score"=>"1",
31
+ "pep_query"=>"1",
32
+ "peptide_master"=>"1",
33
+ "prot_matches"=>"1",
34
+ "_onlyerrortolerant"=>"",
35
+ "_showallfromerrortolerant"=>"",
36
+ "prot_hit_num"=>"1",
37
+ "search_master"=>"1",
38
+ "_sigthreshold"=>"0.05",
39
+ "show_params"=>"1",
40
+ "show_mods"=>"1",
41
+ "show_header"=>"1",
42
+ "pep_isbold"=>"1",
43
+ "pep_seq"=>"1",
44
+ "pep_exp_z"=>"1",
45
+ "prot_desc"=>"1",
46
+ "_ignoreionsscorebelow"=>"0",
47
+ "REPORT"=>"AUTO",
48
+ "pep_rank"=>"1",
49
+ "pep_var_mod"=>"1",
50
+ "_noerrortolerant"=>""
51
+ }
15
52
 
16
- super(*requests)
53
+ # Typical headers for an export
54
+ DEFAULT_HEADERS = {
55
+ "Keep-Alive"=>"300",
56
+ "Accept-Encoding"=>"gzip,deflate",
57
+ "Accept-Language"=>"en-us,en;q=0.5",
58
+ "Content-Type"=> "multipart/form-data; boundary=---------------------------168072824752491622650073",
59
+ "Accept-Charset"=>"ISO-8859-1,utf-8;q=0.7,*;q=0.7",
60
+ "Accept"=>"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
61
+ "Connection"=>"keep-alive"
62
+ }
63
+
64
+ config :uri, DEFAULT_URI # The uri of the mascot search site
65
+ config :headers, DEFAULT_HEADERS, &c.hash # a hash of request headers
66
+ config :params, DEFAULT_PARAMS, &c.hash # a hash of query parameters
67
+ config :request_method, 'GET' # the request method (get or post)
68
+ config :version, 1.1 # the HTTP version
69
+ config :redirection_limit, nil, &c.integer_or_nil # the redirection limit for the request
70
+
71
+ def process(result_filepath)
72
+ # duplicate the configurations
73
+ request = {}
74
+ config.each_pair do |key, value|
75
+ request[key] = value.kind_of?(Hash) ? value.dup : value
76
+ end
77
+
78
+ # set filename for export
79
+ request[:params]['file'] = result_filepath
80
+
81
+ super(request)
17
82
  end
18
83
  end
19
84
  end
@@ -0,0 +1,54 @@
1
+ require 'ms/mascot/mgf/entry'
2
+
3
+ module Ms
4
+ module Mascot
5
+ # :startdoc::manifest formats an fragment spectrum as mgf
6
+ #
7
+ # Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
8
+ # configurations specify various details of the dump, including the
9
+ # precision and default headers.
10
+ #
11
+ # % rap fragment TVQQEL --:s format_mgf
12
+ #
13
+ # (note: be sure to use the splat option on the join)
14
+ #
15
+ class FormatMgf < Tap::Task
16
+
17
+ config :default_headers, {}, &c.hash # a hash of default headers
18
+ config :min_length, 3, &c.integer_or_nil # the minimum peptide length
19
+ config :mz_precision, 6, &c.integer # the precision of mzs
20
+ config :intensity_precision, 0, &c.integer # the precision of intensities
21
+ config :pepmass_precision, 6, &c.integer # the precision of peptide mass
22
+
23
+ config :prefix, nil, &c.string_or_nil # an optional prefix
24
+ config :suffix, "\n", &c.string_or_nil # an optional suffix
25
+
26
+ # Maps header keys (typically output by a fragment task)
27
+ # to Mgf::Entry header strings.
28
+ HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
29
+
30
+ def process(data, headers)
31
+ lines = []
32
+ lines << prefix if prefix
33
+
34
+ mgf_headers = format_headers(headers)
35
+ Ms::Mascot::Mgf::Entry.new(mgf_headers, data).dump(lines, config)
36
+
37
+ lines << suffix if suffix
38
+ lines.join("")
39
+ end
40
+
41
+ protected
42
+
43
+ # helper to format the headers properly for an mgf entry
44
+ def format_headers(headers) # :nodoc:
45
+ mgf_headers = {}
46
+ default_headers.merge(headers).each_pair do |key, value|
47
+ key = HEADER_MAP[key] || key.to_s.upcase
48
+ mgf_headers[key] = value
49
+ end
50
+ mgf_headers
51
+ end
52
+ end
53
+ end
54
+ end
@@ -4,36 +4,40 @@ require 'ms/mascot/spectrum'
4
4
  module Ms
5
5
  module Mascot
6
6
 
7
- # Ms::Mascot::Fragment::manifest calculates a theoretical Mascot ms/ms spectrum
7
+ # :startdoc::manifest calculates a theoretical Mascot ms/ms spectrum
8
8
  #
9
- # Calculates the parent ion mass and theoretical ms/ms spectrum for a peptide
10
- # sequence. Configurations allow the specification of one or more
11
- # fragmentation series to include, as well as charge, and intensity.
12
- #
13
- # % rap fragment TVQQEL --+ dump --no-audit
14
- # # date: 2008-09-15 14:37:55
15
- # ---
16
- # ms/mascot/fragment (:...:):
17
- # - - 717.3777467
18
- # - - 102.054955
19
- # - 132.1019047
20
- # - 201.123369
21
- # - 261.1444977
22
- # - 329.181947
23
- # - 389.2030757
24
- # - 457.240525
25
- # - 517.2616537
26
- # - 586.283118
27
- # - 616.3300677
28
- #
29
- # In the output, the parent ion mass is given first, followed by an array of
30
- # the sorted fragmentation data.
9
+ # Calculates the theoretical Mascot ms/ms spectrum for a peptide sequence.
10
+ # A Mascot spectrum differs from the standard in-silico spectrum only in
11
+ # the masses that get used. By default Mascot::Fragment uses masses with
12
+ # 6 significant digits, the same masses that Mascot uses by default, and
13
+ # generates spectra with an intensity of 1.
14
+ #
15
+ # In addition, Mascot::Fragment supports several alternative series notations.
16
+ #
17
+ # Notation Translation Example
18
+ # series+<n> series + Hn a++, y0++
19
+ # series* series - NH3 b*
20
+ # series0 series - H2O y0
21
+ # Immon. immonium Immon.
22
+ #
23
+ # See Ms::Mascot::Spectrum for more details.
31
24
  class Fragment < InSilico::Fragment
32
-
25
+
26
+ config :intensity, 1, &c.num_or_nil # a uniform intensity value
27
+
28
+ # Generates some MGF-specific headers.
29
+ def headers(spec)
30
+ {
31
+ :charge => charge,
32
+ :parent_ion_mass => spec.parent_ion_mass(charge),
33
+ :title => "#{spec.sequence} (#{series.join(', ')})"
34
+ }
35
+ end
36
+
37
+ # Returns a Mascot::Spectrum for the peptide.
33
38
  def spectrum(peptide)
34
39
  Mascot::Spectrum.new(peptide, nterm, cterm)
35
40
  end
36
-
37
41
  end
38
42
  end
39
43
  end
data/lib/ms/mascot/mgf.rb CHANGED
@@ -1,2 +1,35 @@
1
- require 'mascot/formats/mgf/entry'
2
- require 'mascot/formats/mgf/archive'
1
+ require 'ms/mascot/mgf/entry'
2
+ require 'ms/mascot/mgf/archive'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Mgf
7
+ class << self
8
+ # Opens the file and yields an array of entries (well, the array is
9
+ # actually an Ms::Mascot::Mgf::Archive object that acts like an array
10
+ # but leaves data on disk until needed)
11
+ #
12
+ # Ms::Mascot::Mgf.open("file.mgf") do |ar|
13
+ # entry5 = ar[4] # -> 5th entry
14
+ # entry5.pepmass # -> peptide mass
15
+ # entry5.data # -> array of arrays
16
+ # end
17
+ def open(file, &block)
18
+ File.open(file) do |io|
19
+ a = Archive.new(io)
20
+ a.reindex
21
+ block.call(a)
22
+ a.close
23
+ end
24
+ end
25
+
26
+ # returns each entry in the mgf file, like IO.foreach
27
+ def foreach(file, &block)
28
+ open(file) do |ar|
29
+ ar.each &block
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -54,7 +54,17 @@ module Ms
54
54
 
55
55
  # The peptide mass of the entry
56
56
  attr_accessor :pepmass
57
-
57
+
58
+ # returns the title of the entry (or nil if none)
59
+ def title
60
+ @headers['TITLE']
61
+ end
62
+
63
+ # sets the title
64
+ def title=(string)
65
+ @headers['TITLE'] = string
66
+ end
67
+
58
68
  # The data (mz/intensity) for the entry
59
69
  attr_accessor :data
60
70
 
@@ -129,8 +139,16 @@ module Ms
129
139
 
130
140
  target << "CHARGE=#{charge_to_s}\n"
131
141
  target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
132
-
133
- data_format = "#{format options[:mz_precision]} #{format options[:intensity_precision]}\n"
142
+
143
+ entry = data[0]
144
+ data_format = case
145
+ when entry == nil then nil
146
+ when entry.kind_of?(Array) && entry.length == 2
147
+ "#{format options[:mz_precision]} #{format options[:intensity_precision]}\n"
148
+ else
149
+ "#{format options[:mz_precision]}\n"
150
+ end
151
+
134
152
  data.each do |data_point|
135
153
  target << (data_format % data_point)
136
154
  end
@@ -153,10 +171,10 @@ module Ms
153
171
 
154
172
  # returns a format string for the specified precision
155
173
  def format(precision) # :nodoc:
156
- precision == nil ? "%s" : "%.#{precision}f"
174
+ (precision == nil || precision == 0) ? "%s" : "%.#{precision}f"
157
175
  end
158
176
 
159
177
  end
160
178
  end
161
179
  end
162
- end
180
+ end
@@ -44,6 +44,21 @@ module Ms
44
44
  #
45
45
  # Spectrum calculates peptide mass using the masses in mass_map,
46
46
  # ie the rounded residue masses.
47
+ #
48
+ # === Mascot-Specific Series Notation
49
+ #
50
+ # Mascot::Spectrum adds support for a few mascot-specific notations.
51
+ #
52
+ # Notation Translation Example
53
+ # series+<n> series + Hn a++, y0++
54
+ # series* series - NH3 y*
55
+ # series0 series - H2O a0
56
+ # Immon. immonium Immon.
57
+ #
58
+ # It should be noted that Mascot applies unknown masks to these values;
59
+ # when using these types of series, Spectrum will often predict peaks
60
+ # that do not appear in a Mascot search result.
61
+ #
47
62
  class Spectrum < InSilico::Spectrum
48
63
  Element = Constants::Libraries::Element
49
64
 
@@ -171,11 +186,11 @@ module Ms
171
186
  #
172
187
  def handle_unknown_series(s)
173
188
  case s
174
- when /^([\w\+\-]+)+(\d+)$/
189
+ when /^([abcxyYz][\+\-]+)+(\d+)$/
175
190
  self.series("#{$1} +H#{$2.to_i}")
176
- when /^(\w+)\*(\+*-*)$/
191
+ when /^([abcxyYz])\*(\+*-*)$/
177
192
  self.series("#{$1}#{$2} -NH3")
178
- when /^(\w+)0(\+*-*)$/
193
+ when /^([abcxyYz])0(\+*-*)$/
179
194
  self.series("#{$1}#{$2} -H2O")
180
195
  when /^Immon\.(.*)$/
181
196
  self.series("immonium#{$1}")
@@ -1,41 +1,132 @@
1
- require 'tap/http/dispatch'
1
+ require 'tap/http/submit'
2
2
 
3
3
  module Ms
4
4
  module Mascot
5
- # :startdoc::manifest submits an mgf file
6
- # UNDER CONSTRUCTION
7
- class Submit < Tap::Http::Dispatch
8
- RESULT_REGEXP = /<A HREF="..\/cgi\/master_results.pl\?file=(.*?\.dat)">/im
9
- ERROR_REGEXP = /<BR>The following error has occured getting your search details:<BR>(.*?)<BR>/im
10
- MISTAKE_REGEXP = /<BR>Sorry, your search could not be performed due to the following mistake entering data.<BR>(.*?)<BR>/im
11
-
12
- def process(*mgf_files)
13
- # generate request hashes for the mgf files using the
14
- # configured parameters
15
- requests = mgf_files.collect do |mgf_file|
16
- file = {'Content-Type' => 'application/octet-stream', 'Filename' => mgf_file}
17
- {:params => params.merge("FILE" => file)}
5
+ # :startdoc::manifest submits a PMF or MS/MS search to Mascot
6
+ #
7
+ # Submits a search request to Mascot using the mgf file and the search
8
+ # parameters in a static config file. Correctly formatting search
9
+ # config file is technical since it must contain the correct fields for
10
+ # Submit to recreate a Mascot HTTP search request.
11
+ #
12
+ # The easiest way to capture search parameters in the correct format is
13
+ # to use TapHttpFrom the command line, invoke:
14
+ #
15
+ # % tap server
16
+ #
17
+ # Then visit 'http://localhost:8080/capture/tutorial' in a browser and
18
+ # apply the capture procedure to the Mascot search page. Once you have
19
+ # the .yml config file, use this command to submit a search.
20
+ #
21
+ # % rap submit <mgf_file> --config <config_file> --: dump
22
+ #
23
+ # A convenient aspect of this setup is that you can capure parameters
24
+ # once, then re-use them for a number of mgf files.
25
+ #
26
+ # Note that the default Submit configuration uses parameters are typical
27
+ # for MS/MS searching of a human sample digested with trypsin. These
28
+ # values MUST be overridden and are only provided as a template (for
29
+ # those that want the adventure of manually making a config file).
30
+ #
31
+ class Submit < Tap::Http::Submit
32
+
33
+ # The MatrixScience public search site
34
+ DEFAULT_URI = "http://www.matrixscience.com/cgi/nph-mascot.exe?1"
35
+
36
+ # Parameters for MS/MS searching of a human sample digested with trypsin.
37
+ DEFAULT_PARAMS = {
38
+ "ErrTolRepeat"=>"0",
39
+ "PFA"=>"1",
40
+ "INSTRUMENT"=>"Default",
41
+ "REPTYPE"=>"peptide",
42
+ "COM"=>"Search Title",
43
+ "FORMAT"=>"Mascot generic",
44
+ "PEAK"=>"AUTO",
45
+ "CHARGE"=>"2+",
46
+ "INTERMEDIATE"=>"",
47
+ "SHOWALLMODS"=>"",
48
+ "PRECURSOR"=>"",
49
+ "USERNAME"=>"Name",
50
+ "TOLU"=>"ppm",
51
+ "USEREMAIL"=>"email@email.com",
52
+ "CLE"=>"Trypsin",
53
+ "TOL"=>"100",
54
+ "ITOLU"=>"Da",
55
+ "QUANTITATION"=>"None",
56
+ "SEARCH"=>"MIS",
57
+ "DB"=>"SwissProt",
58
+ "PEP_ISOTOPE_ERROR"=>"0",
59
+ "ITOL"=>"0.6",
60
+ "FORMVER"=>"1.01",
61
+ "IT_MODS"=> [
62
+ "Acetyl (Protein N-term)",
63
+ "Gln->pyro-Glu (N-term Q)",
64
+ "Oxidation (M)"],
65
+ "MASS"=>"Monoisotopic",
66
+ "REPORT"=>"AUTO",
67
+ "TAXONOMY"=>". . . . . . . . . . . . . . . . Homo sapiens (human)"
68
+ }
69
+
70
+ # Typical headers for an MS/MS search.
71
+ DEFAULT_HEADERS = {
72
+ "Keep-Alive"=>"300",
73
+ "Accept-Encoding"=>"gzip,deflate",
74
+ "Accept-Language"=>"en-us,en;q=0.5",
75
+ "Content-Type"=> "multipart/form-data; boundary=---------------------------168072824752491622650073",
76
+ "Accept-Charset"=>"ISO-8859-1,utf-8;q=0.7,*;q=0.7",
77
+ "Accept"=>"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
78
+ "Connection"=>"keep-alive"
79
+ }
80
+
81
+ # Matches a successful search response. After the match:
82
+ #
83
+ # $1:: the result file
84
+ SUCCESS_REGEXP = /<A HREF="\.\.\/cgi\/master_results\.pl\?file=(.*?)">Click here to see Search Report<\/A>/
85
+
86
+ # Matches a failure response. After the match:
87
+ #
88
+ # $1:: the failure message
89
+ FAILURE_REGEXP = /<BR>(.*)/m
90
+
91
+ config :uri, DEFAULT_URI # The uri of the mascot search site
92
+ config :headers, DEFAULT_HEADERS, &c.hash # a hash of request headers
93
+ config :params, DEFAULT_PARAMS, &c.hash # a hash of query parameters
94
+ config :request_method, 'POST' # the request method (get or post)
95
+ config :version, 1.1 # the HTTP version
96
+ config :redirection_limit, nil, &c.integer_or_nil # the redirection limit for the request
97
+
98
+ def process(mgf_file)
99
+
100
+ # duplicate the configurations
101
+ request = {}
102
+ config.each_pair do |key, value|
103
+ request[key] = value.kind_of?(Hash) ? value.dup : value
18
104
  end
19
-
20
- super(*requests)
105
+
106
+ # set filename for upload
107
+ file = request[:params]['FILE'] ||= {}
108
+ file['Filename'] = mgf_file
109
+ file['Content-Type'] = 'application/octet-stream'
110
+ file.delete('Content')
111
+
112
+ # submit request
113
+ parse_response_body super(request)
21
114
  end
22
115
 
23
- # Hook for processing a response. By default process_response
24
- # simply logs the response message and returns the response.
25
- def process_response(res)
26
- case res.body
27
- when RESULT_REGEXP
28
- log(res.message, $1)
116
+ # Processes the response body. Returns the result file if the body
117
+ # indicates a success, or nil if the body indicates a failure.
118
+ def parse_response_body(body)
119
+ case body
120
+ when SUCCESS_REGEXP
121
+ log :success, $1
29
122
  $1
30
-
31
- when ERROR_REGEXP
32
- raise ResponseError, "error: #{$1.strip}"
33
- when MISTAKE_REGEXP
34
- raise ResponseError, "mistake: #{$1.strip}"
123
+ when FAILURE_REGEXP
124
+ log :failure, $1.gsub("<BR>", "\n")
125
+ nil
35
126
  else
36
- raise ResponseError, "unknown error:\n#{res.body}"
127
+ raise "unparseable response: #{body}"
37
128
  end
38
129
  end
39
- end
130
+ end
40
131
  end
41
132
  end