ms-mascot 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +8 -0
- data/lib/ms/mascot/dump/mgf.rb +64 -0
- data/lib/ms/mascot/export.rb +3 -3
- data/lib/ms/mascot/formats/xml.rb +66 -0
- data/lib/ms/mascot/fragment.rb +2 -2
- data/lib/ms/mascot/load/mgf.rb +79 -0
- data/lib/ms/mascot/mgf/entry.rb +8 -3
- data/lib/ms/mascot/submit.rb +19 -18
- metadata +23 -11
- data/lib/ms/mascot/format_mgf.rb +0 -53
data/History
CHANGED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'tap/tasks/dump'
|
2
|
+
require 'ms/mascot/mgf/entry'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Mascot
|
6
|
+
module Dump
|
7
|
+
# :startdoc::task dumps a fragment spectrum as mgf
|
8
|
+
#
|
9
|
+
# Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
|
10
|
+
# configurations specify various details of the dump, including the
|
11
|
+
# precision and default headers.
|
12
|
+
#
|
13
|
+
# % tap run -- fragment TVQQEL --: dump/mgf
|
14
|
+
#
|
15
|
+
# :startdoc::task-
|
16
|
+
#
|
17
|
+
# Examples:
|
18
|
+
#
|
19
|
+
# # reformat an mgf file (inefficient but works)
|
20
|
+
# % rap load/mgf --file OLD_FILE --:i dump/mgf --mz-precision 2 > NEW_FILE
|
21
|
+
#
|
22
|
+
class Mgf < Tap::Tasks::Dump
|
23
|
+
|
24
|
+
config :default_headers, {}, &c.hash # A hash of default headers
|
25
|
+
config :mz_precision, 6, &c.integer # The precision of mzs
|
26
|
+
config :intensity_precision, 0, &c.integer # The precision of intensities
|
27
|
+
config :pepmass_precision, 6, &c.integer # The precision of peptide mass
|
28
|
+
|
29
|
+
config :prefix, nil, &c.string_or_nil # An optional prefix
|
30
|
+
config :suffix, "\n", &c.string_or_nil # An optional suffix
|
31
|
+
|
32
|
+
# Maps common variations of header keys (typically output
|
33
|
+
# by a fragment task) to Mgf::Entry header strings.
|
34
|
+
HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
|
35
|
+
|
36
|
+
# Dumps the object to io as YAML.
|
37
|
+
def dump(obj, io)
|
38
|
+
unless obj.kind_of?(Ms::Mascot::Mgf::Entry)
|
39
|
+
data, headers = obj
|
40
|
+
mgf_headers = format_headers(headers)
|
41
|
+
obj = Ms::Mascot::Mgf::Entry.new(mgf_headers, data)
|
42
|
+
end
|
43
|
+
|
44
|
+
io << prefix if prefix
|
45
|
+
obj.dump(io, config)
|
46
|
+
io << suffix if suffix
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
|
51
|
+
# helper to format the headers properly for an mgf entry
|
52
|
+
def format_headers(headers) # :nodoc:
|
53
|
+
headers ||= {}
|
54
|
+
mgf_headers = {}
|
55
|
+
default_headers.merge(headers).each_pair do |key, value|
|
56
|
+
key = HEADER_MAP[key] || key.to_s.upcase
|
57
|
+
mgf_headers[key] = value
|
58
|
+
end
|
59
|
+
mgf_headers
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/ms/mascot/export.rb
CHANGED
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
|
|
3
3
|
|
4
4
|
module Ms
|
5
5
|
module Mascot
|
6
|
-
# :startdoc::
|
6
|
+
# :startdoc::task exports results from a search
|
7
7
|
class Export < Tap::Mechanize::Submit
|
8
8
|
include Validation
|
9
9
|
|
@@ -15,7 +15,7 @@ module Ms
|
|
15
15
|
config "pep_expect", "1", &MASCOT_SWITCH
|
16
16
|
config "prot_mass", "1", &MASCOT_SWITCH
|
17
17
|
config "protein_master", "1", &MASCOT_SWITCH
|
18
|
-
config "_server_mudpit_switch", 0.000000001, &c.
|
18
|
+
config "_server_mudpit_switch", 0.000000001, &c.numeric
|
19
19
|
config "pep_exp_mz", "1", &MASCOT_SWITCH
|
20
20
|
config "do_export", "1", &MASCOT_SWITCH
|
21
21
|
config "pep_delta", "1", &MASCOT_SWITCH
|
@@ -37,7 +37,7 @@ module Ms
|
|
37
37
|
config "_showallfromerrortolerant", ""
|
38
38
|
config "prot_hit_num", "1", &MASCOT_SWITCH
|
39
39
|
config "search_master", "1", &MASCOT_SWITCH
|
40
|
-
config "_sigthreshold", 0.05, &c.
|
40
|
+
config "_sigthreshold", 0.05, &c.numeric
|
41
41
|
config "show_params", "1", &MASCOT_SWITCH
|
42
42
|
config "show_mods", "1", &MASCOT_SWITCH
|
43
43
|
config "show_header", "1", &MASCOT_SWITCH
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Formats
|
6
|
+
class Xml
|
7
|
+
module Utils
|
8
|
+
def hashify(nodes, cast=true)
|
9
|
+
nodes.inject({}) do |hash, node|
|
10
|
+
value = node.content
|
11
|
+
hash[node.name] = cast ? objectify(value) : value
|
12
|
+
hash
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def objectify(value)
|
17
|
+
case value
|
18
|
+
when nil
|
19
|
+
nil
|
20
|
+
when /\A\d+(\.\d+)?\z/
|
21
|
+
$1 ? value.to_f : value.to_i
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :doc
|
29
|
+
|
30
|
+
def initialize(xml)
|
31
|
+
xml = xml.read if xml.respond_to?(:read)
|
32
|
+
xml = xml.sub(%q{xmlns="http://www.matrixscience.com/xmlns/schema/mascot_search_results_2"}, "")
|
33
|
+
@doc = Nokogiri::XML(xml)
|
34
|
+
end
|
35
|
+
|
36
|
+
def header
|
37
|
+
doc.at("/mascot_search_results/header")
|
38
|
+
end
|
39
|
+
|
40
|
+
def modifications
|
41
|
+
doc.xpath("/mascot_search_results/variable_mods/modification")
|
42
|
+
end
|
43
|
+
|
44
|
+
def search_parameters
|
45
|
+
doc.xpath("/mascot_search_results/search_parameters")
|
46
|
+
end
|
47
|
+
|
48
|
+
def format_parameters
|
49
|
+
doc.xpath("/mascot_search_results/format_parameters")
|
50
|
+
end
|
51
|
+
|
52
|
+
def hits
|
53
|
+
doc.xpath("/mascot_search_results/hits/hit")
|
54
|
+
end
|
55
|
+
|
56
|
+
def proteins(hit)
|
57
|
+
hit.xpath("protein")
|
58
|
+
end
|
59
|
+
|
60
|
+
def peptides(protein)
|
61
|
+
protein.xpath("peptide")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/ms/mascot/fragment.rb
CHANGED
@@ -4,7 +4,7 @@ require 'ms/mascot/spectrum'
|
|
4
4
|
module Ms
|
5
5
|
module Mascot
|
6
6
|
|
7
|
-
# :startdoc::
|
7
|
+
# :startdoc::task calculates a theoretical Mascot ms/ms spectrum
|
8
8
|
#
|
9
9
|
# Calculates the theoretical Mascot ms/ms spectrum for a peptide sequence.
|
10
10
|
# A Mascot spectrum differs from the standard in-silico spectrum only in
|
@@ -23,7 +23,7 @@ module Ms
|
|
23
23
|
# See Ms::Mascot::Spectrum for more details.
|
24
24
|
class Fragment < InSilico::Fragment
|
25
25
|
|
26
|
-
config :intensity, 1, &c.
|
26
|
+
config :intensity, 1, &c.numeric_or_nil # a uniform intensity value
|
27
27
|
|
28
28
|
# Generates some MGF-specific headers.
|
29
29
|
def headers(spec)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'tap/tasks/load'
|
2
|
+
require 'ms/mascot/mgf/archive'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Mascot
|
6
|
+
module Load
|
7
|
+
|
8
|
+
# :startdoc::task loads entries from an mgf file
|
9
|
+
#
|
10
|
+
# Load entries from an mgf file. A selector may be specified to select
|
11
|
+
# only a subset of entries; by default all entries in the mgf file are
|
12
|
+
# returned.
|
13
|
+
#
|
14
|
+
# % tap run -- mgf/load --select 1 < MGF_FILE
|
15
|
+
# % tap run -- mgf/load --select 1..10 < MGF_FILE
|
16
|
+
#
|
17
|
+
# Entries are always returned as an array, even when the selecton is
|
18
|
+
# for a single entry.
|
19
|
+
#
|
20
|
+
class Mgf < Tap::Tasks::Load
|
21
|
+
Archive = Ms::Mascot::Mgf::Archive
|
22
|
+
|
23
|
+
config :select, nil do |input| # An array selector for entries
|
24
|
+
if input.kind_of?(String)
|
25
|
+
input = "!ruby/range #{input}" if input =~ /\.{2,3}/
|
26
|
+
input = YAML.load(input)
|
27
|
+
end
|
28
|
+
c.validate(input, [nil, Integer, Range, Array])
|
29
|
+
end
|
30
|
+
|
31
|
+
nest :filter do
|
32
|
+
config :title, nil, &c.regexp_or_nil
|
33
|
+
config :charge, nil, &c.range_or_nil
|
34
|
+
config :pepmass, nil, &c.range_or_nil
|
35
|
+
config :n_ions, nil, &c.range_or_nil
|
36
|
+
|
37
|
+
def ok?(entry)
|
38
|
+
(!title || entry.title =~ title) &&
|
39
|
+
(!charge || charge.include?(entry.charge)) &&
|
40
|
+
(!pepmass || pepmass.include?(entry.pepmass)) &&
|
41
|
+
(!n_ions || n_ions.include?(entry.data.length))
|
42
|
+
end
|
43
|
+
|
44
|
+
def filter!(entries)
|
45
|
+
return entries unless title || charge || pepmass || n_ions
|
46
|
+
entries.select {|entry| ok?(entry) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def open_io(input)
|
51
|
+
if input.kind_of?(String)
|
52
|
+
Archive.open(input) {|io| yield(io) }
|
53
|
+
else
|
54
|
+
super(input) do |io|
|
55
|
+
arc = Archive.new(io)
|
56
|
+
result = yield(arc)
|
57
|
+
arc.close
|
58
|
+
result
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def load(arc)
|
64
|
+
case select
|
65
|
+
when Integer
|
66
|
+
entry = arc[select]
|
67
|
+
filter.ok?(entry) ? [entry] : []
|
68
|
+
when Range
|
69
|
+
filter.filter!(arc[select])
|
70
|
+
when Array
|
71
|
+
filter.filter!(arc[*select])
|
72
|
+
when nil
|
73
|
+
arc.select {|entry| filter.ok?(entry) }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/ms/mascot/mgf/entry.rb
CHANGED
@@ -136,9 +136,14 @@ module Ms
|
|
136
136
|
(options[:headers] || headers.keys).each do |key|
|
137
137
|
target << "#{key.upcase}=#{headers[key]}\n"
|
138
138
|
end
|
139
|
-
|
140
|
-
|
141
|
-
|
139
|
+
|
140
|
+
if charge
|
141
|
+
target << "CHARGE=#{charge_to_s}\n"
|
142
|
+
end
|
143
|
+
|
144
|
+
if pepmass
|
145
|
+
target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
|
146
|
+
end
|
142
147
|
|
143
148
|
entry = data[0]
|
144
149
|
data_format = case
|
data/lib/ms/mascot/submit.rb
CHANGED
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
|
|
3
3
|
|
4
4
|
module Ms
|
5
5
|
module Mascot
|
6
|
-
# :startdoc::
|
6
|
+
# :startdoc::task submits a PMF or MS/MS search to Mascot
|
7
7
|
#
|
8
8
|
# Submits a search request to Mascot using the mgf file and the search
|
9
9
|
# parameters in a static config file. Correctly formatting search
|
@@ -46,37 +46,38 @@ module Ms
|
|
46
46
|
config :uri, "http://www.matrixscience.com/cgi/nph-mascot.exe?1" # The uri of the mascot search site
|
47
47
|
|
48
48
|
# Parameters for MS/MS searching of a human sample digested with trypsin
|
49
|
-
nest :params do
|
50
|
-
config "
|
51
|
-
config "
|
52
|
-
config "INSTRUMENT", "Default", &c.string
|
53
|
-
config "REPTYPE", "peptide", &c.string
|
49
|
+
nest :params do # The query parameters
|
50
|
+
config "USERNAME", "Name", &c.string
|
51
|
+
config "USEREMAIL", '', &c.string
|
54
52
|
config "COM", "Search Title", &c.string
|
53
|
+
config "INSTRUMENT", "Default", &c.string
|
55
54
|
config "FORMAT", "Mascot generic", &c.string
|
56
|
-
config "PEAK", "AUTO", &c.string
|
57
55
|
config "CHARGE", "+2"
|
58
|
-
config "INTERMEDIATE", "", &c.string
|
59
|
-
config "SHOWALLMODS", "", &c.string
|
60
|
-
config "PRECURSOR", "", &c.string
|
61
|
-
config "USERNAME", "Name", &c.string
|
62
56
|
config "TOLU", "ppm", &c.string
|
63
|
-
config "USEREMAIL", '', &c.string
|
64
57
|
config "CLE", "Trypsin", &c.string
|
65
|
-
config "TOL", 100, &c.
|
58
|
+
config "TOL", 100, &c.numeric
|
66
59
|
config "ITOLU", "Da", &c.string
|
67
|
-
config "
|
68
|
-
config "SEARCH", "MIS", &c.string
|
60
|
+
config "PFA", 1, &MASCOT_SWITCH
|
69
61
|
config "DB", "SwissProt", &c.string
|
70
|
-
config "PEP_ISOTOPE_ERROR", 0, &c.num
|
71
62
|
config "ITOL", 0.6, &c.float
|
72
|
-
config "FORMVER", 1.01, &c.float
|
73
63
|
config "IT_MODS", [
|
74
64
|
"Acetyl (Protein N-term)",
|
75
65
|
"Gln->pyro-Glu (N-term Q)",
|
76
|
-
"Oxidation (M)"
|
66
|
+
"Oxidation (M)"
|
67
|
+
], &c.list
|
77
68
|
config "MASS", "Monoisotopic", &c.string
|
78
69
|
config "REPORT", "AUTO", &c.string
|
79
70
|
config "TAXONOMY", ". . . . . . . . . . . . . . . . Homo sapiens (human)", &c.string
|
71
|
+
config "INTERMEDIATE", "", &c.string
|
72
|
+
config "PRECURSOR", "", &c.string
|
73
|
+
config "QUANTITATION", "None", &c.string
|
74
|
+
config "PEP_ISOTOPE_ERROR", 0, &c.numeric
|
75
|
+
config "SEARCH", "MIS", :type => :hidden
|
76
|
+
config "PEAK", "AUTO", :type => :hidden
|
77
|
+
config "SHOWALLMODS", "", :type => :hidden
|
78
|
+
config "ErrTolRepeat", 0, :type => :hidden
|
79
|
+
config "REPTYPE", "peptide", :type => :hidden
|
80
|
+
config "FORMVER", 1.01, :type => :hidden
|
80
81
|
end
|
81
82
|
|
82
83
|
def process(mgf_file)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-mascot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Chiang
|
@@ -10,31 +10,31 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-05-25 00:00:00 -06:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name: tap
|
17
|
+
name: tap-mechanize
|
18
18
|
type: :runtime
|
19
19
|
version_requirement:
|
20
20
|
version_requirements: !ruby/object:Gem::Requirement
|
21
21
|
requirements:
|
22
22
|
- - ">="
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: 0.
|
24
|
+
version: 0.6.0
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
27
|
+
name: external
|
28
28
|
type: :runtime
|
29
29
|
version_requirement:
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 0.
|
34
|
+
version: 0.3.0
|
35
35
|
version:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
37
|
+
name: ms-in_silico
|
38
38
|
type: :runtime
|
39
39
|
version_requirement:
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,14 +44,24 @@ dependencies:
|
|
44
44
|
version: 0.3.0
|
45
45
|
version:
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
48
|
-
type: :
|
47
|
+
name: minitest
|
48
|
+
type: :development
|
49
|
+
version_requirement:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.3.1
|
55
|
+
version:
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: tap-test
|
58
|
+
type: :development
|
49
59
|
version_requirement:
|
50
60
|
version_requirements: !ruby/object:Gem::Requirement
|
51
61
|
requirements:
|
52
62
|
- - ">="
|
53
63
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
64
|
+
version: 0.1.0
|
55
65
|
version:
|
56
66
|
description:
|
57
67
|
email: simon.a.chiang@gmail.com
|
@@ -77,9 +87,11 @@ files:
|
|
77
87
|
- lib/ms/mascot/dat/section.rb
|
78
88
|
- lib/ms/mascot/dat/summary.rb
|
79
89
|
- lib/ms/mascot/dat/summary/id.rb
|
90
|
+
- lib/ms/mascot/dump/mgf.rb
|
80
91
|
- lib/ms/mascot/export.rb
|
81
|
-
- lib/ms/mascot/
|
92
|
+
- lib/ms/mascot/formats/xml.rb
|
82
93
|
- lib/ms/mascot/fragment.rb
|
94
|
+
- lib/ms/mascot/load/mgf.rb
|
83
95
|
- lib/ms/mascot/mgf.rb
|
84
96
|
- lib/ms/mascot/mgf/archive.rb
|
85
97
|
- lib/ms/mascot/mgf/entry.rb
|
data/lib/ms/mascot/format_mgf.rb
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
require 'ms/mascot/mgf/entry'
|
2
|
-
|
3
|
-
module Ms
|
4
|
-
module Mascot
|
5
|
-
# :startdoc::manifest formats an fragment spectrum as mgf
|
6
|
-
#
|
7
|
-
# Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
|
8
|
-
# configurations specify various details of the dump, including the
|
9
|
-
# precision and default headers.
|
10
|
-
#
|
11
|
-
# % tap run -- fragment TVQQEL --:s format_mgf
|
12
|
-
#
|
13
|
-
# (note: be sure to use the splat option on the join)
|
14
|
-
#
|
15
|
-
class FormatMgf < Tap::Task
|
16
|
-
|
17
|
-
config :default_headers, {}, &c.hash # A hash of default headers
|
18
|
-
config :mz_precision, 6, &c.integer # The precision of mzs
|
19
|
-
config :intensity_precision, 0, &c.integer # The precision of intensities
|
20
|
-
config :pepmass_precision, 6, &c.integer # The precision of peptide mass
|
21
|
-
|
22
|
-
config :prefix, nil, &c.string_or_nil # An optional prefix
|
23
|
-
config :suffix, "\n", &c.string_or_nil # An optional suffix
|
24
|
-
|
25
|
-
# Maps header keys (typically output by a fragment task)
|
26
|
-
# to Mgf::Entry header strings.
|
27
|
-
HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
|
28
|
-
|
29
|
-
def process(data, headers)
|
30
|
-
lines = []
|
31
|
-
lines << prefix if prefix
|
32
|
-
|
33
|
-
mgf_headers = format_headers(headers)
|
34
|
-
Ms::Mascot::Mgf::Entry.new(mgf_headers, data).dump(lines, config)
|
35
|
-
|
36
|
-
lines << suffix if suffix
|
37
|
-
lines.join("")
|
38
|
-
end
|
39
|
-
|
40
|
-
protected
|
41
|
-
|
42
|
-
# helper to format the headers properly for an mgf entry
|
43
|
-
def format_headers(headers) # :nodoc:
|
44
|
-
mgf_headers = {}
|
45
|
-
default_headers.merge(headers).each_pair do |key, value|
|
46
|
-
key = HEADER_MAP[key] || key.to_s.upcase
|
47
|
-
mgf_headers[key] = value
|
48
|
-
end
|
49
|
-
mgf_headers
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|