ms-mascot 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/History CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.3.0 / 2009-05-25
2
+
3
+ Updated to utilize Tap-0.17.0
4
+
5
+ * reworked format_mgf as a dump task (dump/mgf)
6
+ * added a load/mgf task
7
+ * added xml format for accessing results
8
+
1
9
  == 0.2.2 / 2009-03-31
2
10
 
3
11
  * updates to use latest tap
@@ -0,0 +1,64 @@
1
+ require 'tap/tasks/dump'
2
+ require 'ms/mascot/mgf/entry'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Dump
7
+ # :startdoc::task dumps a fragment spectrum as mgf
8
+ #
9
+ # Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
10
+ # configurations specify various details of the dump, including the
11
+ # precision and default headers.
12
+ #
13
+ # % tap run -- fragment TVQQEL --: dump/mgf
14
+ #
15
+ # :startdoc::task-
16
+ #
17
+ # Examples:
18
+ #
19
+ # # reformat an mgf file (inefficient but works)
20
+ # % rap load/mgf --file OLD_FILE --:i dump/mgf --mz-precision 2 > NEW_FILE
21
+ #
22
+ class Mgf < Tap::Tasks::Dump
23
+
24
+ config :default_headers, {}, &c.hash # A hash of default headers
25
+ config :mz_precision, 6, &c.integer # The precision of mzs
26
+ config :intensity_precision, 0, &c.integer # The precision of intensities
27
+ config :pepmass_precision, 6, &c.integer # The precision of peptide mass
28
+
29
+ config :prefix, nil, &c.string_or_nil # An optional prefix
30
+ config :suffix, "\n", &c.string_or_nil # An optional suffix
31
+
32
+ # Maps common variations of header keys (typically output
33
+ # by a fragment task) to Mgf::Entry header strings.
34
+ HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
35
+
36
+ # Dumps the object to io as YAML.
37
+ def dump(obj, io)
38
+ unless obj.kind_of?(Ms::Mascot::Mgf::Entry)
39
+ data, headers = obj
40
+ mgf_headers = format_headers(headers)
41
+ obj = Ms::Mascot::Mgf::Entry.new(mgf_headers, data)
42
+ end
43
+
44
+ io << prefix if prefix
45
+ obj.dump(io, config)
46
+ io << suffix if suffix
47
+ end
48
+
49
+ protected
50
+
51
+ # helper to format the headers properly for an mgf entry
52
+ def format_headers(headers) # :nodoc:
53
+ headers ||= {}
54
+ mgf_headers = {}
55
+ default_headers.merge(headers).each_pair do |key, value|
56
+ key = HEADER_MAP[key] || key.to_s.upcase
57
+ mgf_headers[key] = value
58
+ end
59
+ mgf_headers
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
3
3
 
4
4
  module Ms
5
5
  module Mascot
6
- # :startdoc::manifest exports results from a search
6
+ # :startdoc::task exports results from a search
7
7
  class Export < Tap::Mechanize::Submit
8
8
  include Validation
9
9
 
@@ -15,7 +15,7 @@ module Ms
15
15
  config "pep_expect", "1", &MASCOT_SWITCH
16
16
  config "prot_mass", "1", &MASCOT_SWITCH
17
17
  config "protein_master", "1", &MASCOT_SWITCH
18
- config "_server_mudpit_switch", 0.000000001, &c.num
18
+ config "_server_mudpit_switch", 0.000000001, &c.numeric
19
19
  config "pep_exp_mz", "1", &MASCOT_SWITCH
20
20
  config "do_export", "1", &MASCOT_SWITCH
21
21
  config "pep_delta", "1", &MASCOT_SWITCH
@@ -37,7 +37,7 @@ module Ms
37
37
  config "_showallfromerrortolerant", ""
38
38
  config "prot_hit_num", "1", &MASCOT_SWITCH
39
39
  config "search_master", "1", &MASCOT_SWITCH
40
- config "_sigthreshold", 0.05, &c.num
40
+ config "_sigthreshold", 0.05, &c.numeric
41
41
  config "show_params", "1", &MASCOT_SWITCH
42
42
  config "show_mods", "1", &MASCOT_SWITCH
43
43
  config "show_header", "1", &MASCOT_SWITCH
@@ -0,0 +1,66 @@
1
+ require 'nokogiri'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Formats
6
+ class Xml
7
+ module Utils
8
+ def hashify(nodes, cast=true)
9
+ nodes.inject({}) do |hash, node|
10
+ value = node.content
11
+ hash[node.name] = cast ? objectify(value) : value
12
+ hash
13
+ end
14
+ end
15
+
16
+ def objectify(value)
17
+ case value
18
+ when nil
19
+ nil
20
+ when /\A\d+(\.\d+)?\z/
21
+ $1 ? value.to_f : value.to_i
22
+ else
23
+ value
24
+ end
25
+ end
26
+ end
27
+
28
+ attr_reader :doc
29
+
30
+ def initialize(xml)
31
+ xml = xml.read if xml.respond_to?(:read)
32
+ xml = xml.sub(%q{xmlns="http://www.matrixscience.com/xmlns/schema/mascot_search_results_2"}, "")
33
+ @doc = Nokogiri::XML(xml)
34
+ end
35
+
36
+ def header
37
+ doc.at("/mascot_search_results/header")
38
+ end
39
+
40
+ def modifications
41
+ doc.xpath("/mascot_search_results/variable_mods/modification")
42
+ end
43
+
44
+ def search_parameters
45
+ doc.xpath("/mascot_search_results/search_parameters")
46
+ end
47
+
48
+ def format_parameters
49
+ doc.xpath("/mascot_search_results/format_parameters")
50
+ end
51
+
52
+ def hits
53
+ doc.xpath("/mascot_search_results/hits/hit")
54
+ end
55
+
56
+ def proteins(hit)
57
+ hit.xpath("protein")
58
+ end
59
+
60
+ def peptides(protein)
61
+ protein.xpath("peptide")
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -4,7 +4,7 @@ require 'ms/mascot/spectrum'
4
4
  module Ms
5
5
  module Mascot
6
6
 
7
- # :startdoc::manifest calculates a theoretical Mascot ms/ms spectrum
7
+ # :startdoc::task calculates a theoretical Mascot ms/ms spectrum
8
8
  #
9
9
  # Calculates the theoretical Mascot ms/ms spectrum for a peptide sequence.
10
10
  # A Mascot spectrum differs from the standard in-silico spectrum only in
@@ -23,7 +23,7 @@ module Ms
23
23
  # See Ms::Mascot::Spectrum for more details.
24
24
  class Fragment < InSilico::Fragment
25
25
 
26
- config :intensity, 1, &c.num_or_nil # a uniform intensity value
26
+ config :intensity, 1, &c.numeric_or_nil # a uniform intensity value
27
27
 
28
28
  # Generates some MGF-specific headers.
29
29
  def headers(spec)
@@ -0,0 +1,79 @@
1
+ require 'tap/tasks/load'
2
+ require 'ms/mascot/mgf/archive'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Load
7
+
8
+ # :startdoc::task loads entries from an mgf file
9
+ #
10
+ # Load entries from an mgf file. A selector may be specified to select
11
+ # only a subset of entries; by default all entries in the mgf file are
12
+ # returned.
13
+ #
14
+ # % tap run -- mgf/load --select 1 < MGF_FILE
15
+ # % tap run -- mgf/load --select 1..10 < MGF_FILE
16
+ #
17
+ # Entries are always returned as an array, even when the selecton is
18
+ # for a single entry.
19
+ #
20
+ class Mgf < Tap::Tasks::Load
21
+ Archive = Ms::Mascot::Mgf::Archive
22
+
23
+ config :select, nil do |input| # An array selector for entries
24
+ if input.kind_of?(String)
25
+ input = "!ruby/range #{input}" if input =~ /\.{2,3}/
26
+ input = YAML.load(input)
27
+ end
28
+ c.validate(input, [nil, Integer, Range, Array])
29
+ end
30
+
31
+ nest :filter do
32
+ config :title, nil, &c.regexp_or_nil
33
+ config :charge, nil, &c.range_or_nil
34
+ config :pepmass, nil, &c.range_or_nil
35
+ config :n_ions, nil, &c.range_or_nil
36
+
37
+ def ok?(entry)
38
+ (!title || entry.title =~ title) &&
39
+ (!charge || charge.include?(entry.charge)) &&
40
+ (!pepmass || pepmass.include?(entry.pepmass)) &&
41
+ (!n_ions || n_ions.include?(entry.data.length))
42
+ end
43
+
44
+ def filter!(entries)
45
+ return entries unless title || charge || pepmass || n_ions
46
+ entries.select {|entry| ok?(entry) }
47
+ end
48
+ end
49
+
50
+ def open_io(input)
51
+ if input.kind_of?(String)
52
+ Archive.open(input) {|io| yield(io) }
53
+ else
54
+ super(input) do |io|
55
+ arc = Archive.new(io)
56
+ result = yield(arc)
57
+ arc.close
58
+ result
59
+ end
60
+ end
61
+ end
62
+
63
+ def load(arc)
64
+ case select
65
+ when Integer
66
+ entry = arc[select]
67
+ filter.ok?(entry) ? [entry] : []
68
+ when Range
69
+ filter.filter!(arc[select])
70
+ when Array
71
+ filter.filter!(arc[*select])
72
+ when nil
73
+ arc.select {|entry| filter.ok?(entry) }
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -136,9 +136,14 @@ module Ms
136
136
  (options[:headers] || headers.keys).each do |key|
137
137
  target << "#{key.upcase}=#{headers[key]}\n"
138
138
  end
139
-
140
- target << "CHARGE=#{charge_to_s}\n"
141
- target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
139
+
140
+ if charge
141
+ target << "CHARGE=#{charge_to_s}\n"
142
+ end
143
+
144
+ if pepmass
145
+ target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
146
+ end
142
147
 
143
148
  entry = data[0]
144
149
  data_format = case
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
3
3
 
4
4
  module Ms
5
5
  module Mascot
6
- # :startdoc::manifest submits a PMF or MS/MS search to Mascot
6
+ # :startdoc::task submits a PMF or MS/MS search to Mascot
7
7
  #
8
8
  # Submits a search request to Mascot using the mgf file and the search
9
9
  # parameters in a static config file. Correctly formatting search
@@ -46,37 +46,38 @@ module Ms
46
46
  config :uri, "http://www.matrixscience.com/cgi/nph-mascot.exe?1" # The uri of the mascot search site
47
47
 
48
48
  # Parameters for MS/MS searching of a human sample digested with trypsin
49
- nest :params do # The query parameters
50
- config "ErrTolRepeat", 0, &MASCOT_SWITCH
51
- config "PFA", 1, &MASCOT_SWITCH
52
- config "INSTRUMENT", "Default", &c.string
53
- config "REPTYPE", "peptide", &c.string
49
+ nest :params do # The query parameters
50
+ config "USERNAME", "Name", &c.string
51
+ config "USEREMAIL", '', &c.string
54
52
  config "COM", "Search Title", &c.string
53
+ config "INSTRUMENT", "Default", &c.string
55
54
  config "FORMAT", "Mascot generic", &c.string
56
- config "PEAK", "AUTO", &c.string
57
55
  config "CHARGE", "+2"
58
- config "INTERMEDIATE", "", &c.string
59
- config "SHOWALLMODS", "", &c.string
60
- config "PRECURSOR", "", &c.string
61
- config "USERNAME", "Name", &c.string
62
56
  config "TOLU", "ppm", &c.string
63
- config "USEREMAIL", '', &c.string
64
57
  config "CLE", "Trypsin", &c.string
65
- config "TOL", 100, &c.num
58
+ config "TOL", 100, &c.numeric
66
59
  config "ITOLU", "Da", &c.string
67
- config "QUANTITATION", "None", &c.string
68
- config "SEARCH", "MIS", &c.string
60
+ config "PFA", 1, &MASCOT_SWITCH
69
61
  config "DB", "SwissProt", &c.string
70
- config "PEP_ISOTOPE_ERROR", 0, &c.num
71
62
  config "ITOL", 0.6, &c.float
72
- config "FORMVER", 1.01, &c.float
73
63
  config "IT_MODS", [
74
64
  "Acetyl (Protein N-term)",
75
65
  "Gln->pyro-Glu (N-term Q)",
76
- "Oxidation (M)"], &c.list
66
+ "Oxidation (M)"
67
+ ], &c.list
77
68
  config "MASS", "Monoisotopic", &c.string
78
69
  config "REPORT", "AUTO", &c.string
79
70
  config "TAXONOMY", ". . . . . . . . . . . . . . . . Homo sapiens (human)", &c.string
71
+ config "INTERMEDIATE", "", &c.string
72
+ config "PRECURSOR", "", &c.string
73
+ config "QUANTITATION", "None", &c.string
74
+ config "PEP_ISOTOPE_ERROR", 0, &c.numeric
75
+ config "SEARCH", "MIS", :type => :hidden
76
+ config "PEAK", "AUTO", :type => :hidden
77
+ config "SHOWALLMODS", "", :type => :hidden
78
+ config "ErrTolRepeat", 0, :type => :hidden
79
+ config "REPTYPE", "peptide", :type => :hidden
80
+ config "FORMVER", 1.01, :type => :hidden
80
81
  end
81
82
 
82
83
  def process(mgf_file)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-mascot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Chiang
@@ -10,31 +10,31 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-03-31 00:00:00 -06:00
13
+ date: 2009-05-25 00:00:00 -06:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: tap
17
+ name: tap-mechanize
18
18
  type: :runtime
19
19
  version_requirement:
20
20
  version_requirements: !ruby/object:Gem::Requirement
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: 0.12.4
24
+ version: 0.6.0
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
- name: tap-mechanize
27
+ name: external
28
28
  type: :runtime
29
29
  version_requirement:
30
30
  version_requirements: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: 0.5.1
34
+ version: 0.3.0
35
35
  version:
36
36
  - !ruby/object:Gem::Dependency
37
- name: external
37
+ name: ms-in_silico
38
38
  type: :runtime
39
39
  version_requirement:
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,14 +44,24 @@ dependencies:
44
44
  version: 0.3.0
45
45
  version:
46
46
  - !ruby/object:Gem::Dependency
47
- name: ms-in_silico
48
- type: :runtime
47
+ name: minitest
48
+ type: :development
49
+ version_requirement:
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.3.1
55
+ version:
56
+ - !ruby/object:Gem::Dependency
57
+ name: tap-test
58
+ type: :development
49
59
  version_requirement:
50
60
  version_requirements: !ruby/object:Gem::Requirement
51
61
  requirements:
52
62
  - - ">="
53
63
  - !ruby/object:Gem::Version
54
- version: 0.2.3
64
+ version: 0.1.0
55
65
  version:
56
66
  description:
57
67
  email: simon.a.chiang@gmail.com
@@ -77,9 +87,11 @@ files:
77
87
  - lib/ms/mascot/dat/section.rb
78
88
  - lib/ms/mascot/dat/summary.rb
79
89
  - lib/ms/mascot/dat/summary/id.rb
90
+ - lib/ms/mascot/dump/mgf.rb
80
91
  - lib/ms/mascot/export.rb
81
- - lib/ms/mascot/format_mgf.rb
92
+ - lib/ms/mascot/formats/xml.rb
82
93
  - lib/ms/mascot/fragment.rb
94
+ - lib/ms/mascot/load/mgf.rb
83
95
  - lib/ms/mascot/mgf.rb
84
96
  - lib/ms/mascot/mgf/archive.rb
85
97
  - lib/ms/mascot/mgf/entry.rb
@@ -1,53 +0,0 @@
1
- require 'ms/mascot/mgf/entry'
2
-
3
- module Ms
4
- module Mascot
5
- # :startdoc::manifest formats an fragment spectrum as mgf
6
- #
7
- # Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
8
- # configurations specify various details of the dump, including the
9
- # precision and default headers.
10
- #
11
- # % tap run -- fragment TVQQEL --:s format_mgf
12
- #
13
- # (note: be sure to use the splat option on the join)
14
- #
15
- class FormatMgf < Tap::Task
16
-
17
- config :default_headers, {}, &c.hash # A hash of default headers
18
- config :mz_precision, 6, &c.integer # The precision of mzs
19
- config :intensity_precision, 0, &c.integer # The precision of intensities
20
- config :pepmass_precision, 6, &c.integer # The precision of peptide mass
21
-
22
- config :prefix, nil, &c.string_or_nil # An optional prefix
23
- config :suffix, "\n", &c.string_or_nil # An optional suffix
24
-
25
- # Maps header keys (typically output by a fragment task)
26
- # to Mgf::Entry header strings.
27
- HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
28
-
29
- def process(data, headers)
30
- lines = []
31
- lines << prefix if prefix
32
-
33
- mgf_headers = format_headers(headers)
34
- Ms::Mascot::Mgf::Entry.new(mgf_headers, data).dump(lines, config)
35
-
36
- lines << suffix if suffix
37
- lines.join("")
38
- end
39
-
40
- protected
41
-
42
- # helper to format the headers properly for an mgf entry
43
- def format_headers(headers) # :nodoc:
44
- mgf_headers = {}
45
- default_headers.merge(headers).each_pair do |key, value|
46
- key = HEADER_MAP[key] || key.to_s.upcase
47
- mgf_headers[key] = value
48
- end
49
- mgf_headers
50
- end
51
- end
52
- end
53
- end