ms-mascot 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.3.0 / 2009-05-25
2
+
3
+ Updated to utilize Tap-0.17.0
4
+
5
+ * reworked format_mgf as a dump task (dump/mgf)
6
+ * added a load/mgf task
7
+ * added xml format for accessing results
8
+
1
9
  == 0.2.2 / 2009-03-31
2
10
 
3
11
  * updates to use latest tap
@@ -0,0 +1,64 @@
1
+ require 'tap/tasks/dump'
2
+ require 'ms/mascot/mgf/entry'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Dump
7
+ # :startdoc::task dumps a fragment spectrum as mgf
8
+ #
9
+ # Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
10
+ # configurations specify various details of the dump, including the
11
+ # precision and default headers.
12
+ #
13
+ # % tap run -- fragment TVQQEL --: dump/mgf
14
+ #
15
+ # :startdoc::task-
16
+ #
17
+ # Examples:
18
+ #
19
+ # # reformat an mgf file (inefficient but works)
20
+ # % rap load/mgf --file OLD_FILE --:i dump/mgf --mz-precision 2 > NEW_FILE
21
+ #
22
+ class Mgf < Tap::Tasks::Dump
23
+
24
+ config :default_headers, {}, &c.hash # A hash of default headers
25
+ config :mz_precision, 6, &c.integer # The precision of mzs
26
+ config :intensity_precision, 0, &c.integer # The precision of intensities
27
+ config :pepmass_precision, 6, &c.integer # The precision of peptide mass
28
+
29
+ config :prefix, nil, &c.string_or_nil # An optional prefix
30
+ config :suffix, "\n", &c.string_or_nil # An optional suffix
31
+
32
+ # Maps common variations of header keys (typically output
33
+ # by a fragment task) to Mgf::Entry header strings.
34
+ HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
35
+
36
+ # Dumps the object to io as YAML.
37
+ def dump(obj, io)
38
+ unless obj.kind_of?(Ms::Mascot::Mgf::Entry)
39
+ data, headers = obj
40
+ mgf_headers = format_headers(headers)
41
+ obj = Ms::Mascot::Mgf::Entry.new(mgf_headers, data)
42
+ end
43
+
44
+ io << prefix if prefix
45
+ obj.dump(io, config)
46
+ io << suffix if suffix
47
+ end
48
+
49
+ protected
50
+
51
+ # helper to format the headers properly for an mgf entry
52
+ def format_headers(headers) # :nodoc:
53
+ headers ||= {}
54
+ mgf_headers = {}
55
+ default_headers.merge(headers).each_pair do |key, value|
56
+ key = HEADER_MAP[key] || key.to_s.upcase
57
+ mgf_headers[key] = value
58
+ end
59
+ mgf_headers
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
3
3
 
4
4
  module Ms
5
5
  module Mascot
6
- # :startdoc::manifest exports results from a search
6
+ # :startdoc::task exports results from a search
7
7
  class Export < Tap::Mechanize::Submit
8
8
  include Validation
9
9
 
@@ -15,7 +15,7 @@ module Ms
15
15
  config "pep_expect", "1", &MASCOT_SWITCH
16
16
  config "prot_mass", "1", &MASCOT_SWITCH
17
17
  config "protein_master", "1", &MASCOT_SWITCH
18
- config "_server_mudpit_switch", 0.000000001, &c.num
18
+ config "_server_mudpit_switch", 0.000000001, &c.numeric
19
19
  config "pep_exp_mz", "1", &MASCOT_SWITCH
20
20
  config "do_export", "1", &MASCOT_SWITCH
21
21
  config "pep_delta", "1", &MASCOT_SWITCH
@@ -37,7 +37,7 @@ module Ms
37
37
  config "_showallfromerrortolerant", ""
38
38
  config "prot_hit_num", "1", &MASCOT_SWITCH
39
39
  config "search_master", "1", &MASCOT_SWITCH
40
- config "_sigthreshold", 0.05, &c.num
40
+ config "_sigthreshold", 0.05, &c.numeric
41
41
  config "show_params", "1", &MASCOT_SWITCH
42
42
  config "show_mods", "1", &MASCOT_SWITCH
43
43
  config "show_header", "1", &MASCOT_SWITCH
@@ -0,0 +1,66 @@
1
+ require 'nokogiri'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Formats
6
+ class Xml
7
+ module Utils
8
+ def hashify(nodes, cast=true)
9
+ nodes.inject({}) do |hash, node|
10
+ value = node.content
11
+ hash[node.name] = cast ? objectify(value) : value
12
+ hash
13
+ end
14
+ end
15
+
16
+ def objectify(value)
17
+ case value
18
+ when nil
19
+ nil
20
+ when /\A\d+(\.\d+)?\z/
21
+ $1 ? value.to_f : value.to_i
22
+ else
23
+ value
24
+ end
25
+ end
26
+ end
27
+
28
+ attr_reader :doc
29
+
30
+ def initialize(xml)
31
+ xml = xml.read if xml.respond_to?(:read)
32
+ xml = xml.sub(%q{xmlns="http://www.matrixscience.com/xmlns/schema/mascot_search_results_2"}, "")
33
+ @doc = Nokogiri::XML(xml)
34
+ end
35
+
36
+ def header
37
+ doc.at("/mascot_search_results/header")
38
+ end
39
+
40
+ def modifications
41
+ doc.xpath("/mascot_search_results/variable_mods/modification")
42
+ end
43
+
44
+ def search_parameters
45
+ doc.xpath("/mascot_search_results/search_parameters")
46
+ end
47
+
48
+ def format_parameters
49
+ doc.xpath("/mascot_search_results/format_parameters")
50
+ end
51
+
52
+ def hits
53
+ doc.xpath("/mascot_search_results/hits/hit")
54
+ end
55
+
56
+ def proteins(hit)
57
+ hit.xpath("protein")
58
+ end
59
+
60
+ def peptides(protein)
61
+ protein.xpath("peptide")
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -4,7 +4,7 @@ require 'ms/mascot/spectrum'
4
4
  module Ms
5
5
  module Mascot
6
6
 
7
- # :startdoc::manifest calculates a theoretical Mascot ms/ms spectrum
7
+ # :startdoc::task calculates a theoretical Mascot ms/ms spectrum
8
8
  #
9
9
  # Calculates the theoretical Mascot ms/ms spectrum for a peptide sequence.
10
10
  # A Mascot spectrum differs from the standard in-silico spectrum only in
@@ -23,7 +23,7 @@ module Ms
23
23
  # See Ms::Mascot::Spectrum for more details.
24
24
  class Fragment < InSilico::Fragment
25
25
 
26
- config :intensity, 1, &c.num_or_nil # a uniform intensity value
26
+ config :intensity, 1, &c.numeric_or_nil # a uniform intensity value
27
27
 
28
28
  # Generates some MGF-specific headers.
29
29
  def headers(spec)
@@ -0,0 +1,79 @@
1
+ require 'tap/tasks/load'
2
+ require 'ms/mascot/mgf/archive'
3
+
4
+ module Ms
5
+ module Mascot
6
+ module Load
7
+
8
+ # :startdoc::task loads entries from an mgf file
9
+ #
10
+ # Load entries from an mgf file. A selector may be specified to select
11
+ # only a subset of entries; by default all entries in the mgf file are
12
+ # returned.
13
+ #
14
+ # % tap run -- mgf/load --select 1 < MGF_FILE
15
+ # % tap run -- mgf/load --select 1..10 < MGF_FILE
16
+ #
17
+ # Entries are always returned as an array, even when the selecton is
18
+ # for a single entry.
19
+ #
20
+ class Mgf < Tap::Tasks::Load
21
+ Archive = Ms::Mascot::Mgf::Archive
22
+
23
+ config :select, nil do |input| # An array selector for entries
24
+ if input.kind_of?(String)
25
+ input = "!ruby/range #{input}" if input =~ /\.{2,3}/
26
+ input = YAML.load(input)
27
+ end
28
+ c.validate(input, [nil, Integer, Range, Array])
29
+ end
30
+
31
+ nest :filter do
32
+ config :title, nil, &c.regexp_or_nil
33
+ config :charge, nil, &c.range_or_nil
34
+ config :pepmass, nil, &c.range_or_nil
35
+ config :n_ions, nil, &c.range_or_nil
36
+
37
+ def ok?(entry)
38
+ (!title || entry.title =~ title) &&
39
+ (!charge || charge.include?(entry.charge)) &&
40
+ (!pepmass || pepmass.include?(entry.pepmass)) &&
41
+ (!n_ions || n_ions.include?(entry.data.length))
42
+ end
43
+
44
+ def filter!(entries)
45
+ return entries unless title || charge || pepmass || n_ions
46
+ entries.select {|entry| ok?(entry) }
47
+ end
48
+ end
49
+
50
+ def open_io(input)
51
+ if input.kind_of?(String)
52
+ Archive.open(input) {|io| yield(io) }
53
+ else
54
+ super(input) do |io|
55
+ arc = Archive.new(io)
56
+ result = yield(arc)
57
+ arc.close
58
+ result
59
+ end
60
+ end
61
+ end
62
+
63
+ def load(arc)
64
+ case select
65
+ when Integer
66
+ entry = arc[select]
67
+ filter.ok?(entry) ? [entry] : []
68
+ when Range
69
+ filter.filter!(arc[select])
70
+ when Array
71
+ filter.filter!(arc[*select])
72
+ when nil
73
+ arc.select {|entry| filter.ok?(entry) }
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -136,9 +136,14 @@ module Ms
136
136
  (options[:headers] || headers.keys).each do |key|
137
137
  target << "#{key.upcase}=#{headers[key]}\n"
138
138
  end
139
-
140
- target << "CHARGE=#{charge_to_s}\n"
141
- target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
139
+
140
+ if charge
141
+ target << "CHARGE=#{charge_to_s}\n"
142
+ end
143
+
144
+ if pepmass
145
+ target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
146
+ end
142
147
 
143
148
  entry = data[0]
144
149
  data_format = case
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
3
3
 
4
4
  module Ms
5
5
  module Mascot
6
- # :startdoc::manifest submits a PMF or MS/MS search to Mascot
6
+ # :startdoc::task submits a PMF or MS/MS search to Mascot
7
7
  #
8
8
  # Submits a search request to Mascot using the mgf file and the search
9
9
  # parameters in a static config file. Correctly formatting search
@@ -46,37 +46,38 @@ module Ms
46
46
  config :uri, "http://www.matrixscience.com/cgi/nph-mascot.exe?1" # The uri of the mascot search site
47
47
 
48
48
  # Parameters for MS/MS searching of a human sample digested with trypsin
49
- nest :params do # The query parameters
50
- config "ErrTolRepeat", 0, &MASCOT_SWITCH
51
- config "PFA", 1, &MASCOT_SWITCH
52
- config "INSTRUMENT", "Default", &c.string
53
- config "REPTYPE", "peptide", &c.string
49
+ nest :params do # The query parameters
50
+ config "USERNAME", "Name", &c.string
51
+ config "USEREMAIL", '', &c.string
54
52
  config "COM", "Search Title", &c.string
53
+ config "INSTRUMENT", "Default", &c.string
55
54
  config "FORMAT", "Mascot generic", &c.string
56
- config "PEAK", "AUTO", &c.string
57
55
  config "CHARGE", "+2"
58
- config "INTERMEDIATE", "", &c.string
59
- config "SHOWALLMODS", "", &c.string
60
- config "PRECURSOR", "", &c.string
61
- config "USERNAME", "Name", &c.string
62
56
  config "TOLU", "ppm", &c.string
63
- config "USEREMAIL", '', &c.string
64
57
  config "CLE", "Trypsin", &c.string
65
- config "TOL", 100, &c.num
58
+ config "TOL", 100, &c.numeric
66
59
  config "ITOLU", "Da", &c.string
67
- config "QUANTITATION", "None", &c.string
68
- config "SEARCH", "MIS", &c.string
60
+ config "PFA", 1, &MASCOT_SWITCH
69
61
  config "DB", "SwissProt", &c.string
70
- config "PEP_ISOTOPE_ERROR", 0, &c.num
71
62
  config "ITOL", 0.6, &c.float
72
- config "FORMVER", 1.01, &c.float
73
63
  config "IT_MODS", [
74
64
  "Acetyl (Protein N-term)",
75
65
  "Gln->pyro-Glu (N-term Q)",
76
- "Oxidation (M)"], &c.list
66
+ "Oxidation (M)"
67
+ ], &c.list
77
68
  config "MASS", "Monoisotopic", &c.string
78
69
  config "REPORT", "AUTO", &c.string
79
70
  config "TAXONOMY", ". . . . . . . . . . . . . . . . Homo sapiens (human)", &c.string
71
+ config "INTERMEDIATE", "", &c.string
72
+ config "PRECURSOR", "", &c.string
73
+ config "QUANTITATION", "None", &c.string
74
+ config "PEP_ISOTOPE_ERROR", 0, &c.numeric
75
+ config "SEARCH", "MIS", :type => :hidden
76
+ config "PEAK", "AUTO", :type => :hidden
77
+ config "SHOWALLMODS", "", :type => :hidden
78
+ config "ErrTolRepeat", 0, :type => :hidden
79
+ config "REPTYPE", "peptide", :type => :hidden
80
+ config "FORMVER", 1.01, :type => :hidden
80
81
  end
81
82
 
82
83
  def process(mgf_file)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-mascot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Chiang
@@ -10,31 +10,31 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-03-31 00:00:00 -06:00
13
+ date: 2009-05-25 00:00:00 -06:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: tap
17
+ name: tap-mechanize
18
18
  type: :runtime
19
19
  version_requirement:
20
20
  version_requirements: !ruby/object:Gem::Requirement
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: 0.12.4
24
+ version: 0.6.0
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
- name: tap-mechanize
27
+ name: external
28
28
  type: :runtime
29
29
  version_requirement:
30
30
  version_requirements: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: 0.5.1
34
+ version: 0.3.0
35
35
  version:
36
36
  - !ruby/object:Gem::Dependency
37
- name: external
37
+ name: ms-in_silico
38
38
  type: :runtime
39
39
  version_requirement:
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,14 +44,24 @@ dependencies:
44
44
  version: 0.3.0
45
45
  version:
46
46
  - !ruby/object:Gem::Dependency
47
- name: ms-in_silico
48
- type: :runtime
47
+ name: minitest
48
+ type: :development
49
+ version_requirement:
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.3.1
55
+ version:
56
+ - !ruby/object:Gem::Dependency
57
+ name: tap-test
58
+ type: :development
49
59
  version_requirement:
50
60
  version_requirements: !ruby/object:Gem::Requirement
51
61
  requirements:
52
62
  - - ">="
53
63
  - !ruby/object:Gem::Version
54
- version: 0.2.3
64
+ version: 0.1.0
55
65
  version:
56
66
  description:
57
67
  email: simon.a.chiang@gmail.com
@@ -77,9 +87,11 @@ files:
77
87
  - lib/ms/mascot/dat/section.rb
78
88
  - lib/ms/mascot/dat/summary.rb
79
89
  - lib/ms/mascot/dat/summary/id.rb
90
+ - lib/ms/mascot/dump/mgf.rb
80
91
  - lib/ms/mascot/export.rb
81
- - lib/ms/mascot/format_mgf.rb
92
+ - lib/ms/mascot/formats/xml.rb
82
93
  - lib/ms/mascot/fragment.rb
94
+ - lib/ms/mascot/load/mgf.rb
83
95
  - lib/ms/mascot/mgf.rb
84
96
  - lib/ms/mascot/mgf/archive.rb
85
97
  - lib/ms/mascot/mgf/entry.rb
@@ -1,53 +0,0 @@
1
- require 'ms/mascot/mgf/entry'
2
-
3
- module Ms
4
- module Mascot
5
- # :startdoc::manifest formats an fragment spectrum as mgf
6
- #
7
- # Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
8
- # configurations specify various details of the dump, including the
9
- # precision and default headers.
10
- #
11
- # % tap run -- fragment TVQQEL --:s format_mgf
12
- #
13
- # (note: be sure to use the splat option on the join)
14
- #
15
- class FormatMgf < Tap::Task
16
-
17
- config :default_headers, {}, &c.hash # A hash of default headers
18
- config :mz_precision, 6, &c.integer # The precision of mzs
19
- config :intensity_precision, 0, &c.integer # The precision of intensities
20
- config :pepmass_precision, 6, &c.integer # The precision of peptide mass
21
-
22
- config :prefix, nil, &c.string_or_nil # An optional prefix
23
- config :suffix, "\n", &c.string_or_nil # An optional suffix
24
-
25
- # Maps header keys (typically output by a fragment task)
26
- # to Mgf::Entry header strings.
27
- HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
28
-
29
- def process(data, headers)
30
- lines = []
31
- lines << prefix if prefix
32
-
33
- mgf_headers = format_headers(headers)
34
- Ms::Mascot::Mgf::Entry.new(mgf_headers, data).dump(lines, config)
35
-
36
- lines << suffix if suffix
37
- lines.join("")
38
- end
39
-
40
- protected
41
-
42
- # helper to format the headers properly for an mgf entry
43
- def format_headers(headers) # :nodoc:
44
- mgf_headers = {}
45
- default_headers.merge(headers).each_pair do |key, value|
46
- key = HEADER_MAP[key] || key.to_s.upcase
47
- mgf_headers[key] = value
48
- end
49
- mgf_headers
50
- end
51
- end
52
- end
53
- end