ms-mascot 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +8 -0
- data/lib/ms/mascot/dump/mgf.rb +64 -0
- data/lib/ms/mascot/export.rb +3 -3
- data/lib/ms/mascot/formats/xml.rb +66 -0
- data/lib/ms/mascot/fragment.rb +2 -2
- data/lib/ms/mascot/load/mgf.rb +79 -0
- data/lib/ms/mascot/mgf/entry.rb +8 -3
- data/lib/ms/mascot/submit.rb +19 -18
- metadata +23 -11
- data/lib/ms/mascot/format_mgf.rb +0 -53
data/History
CHANGED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'tap/tasks/dump'
|
2
|
+
require 'ms/mascot/mgf/entry'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Mascot
|
6
|
+
module Dump
|
7
|
+
# :startdoc::task dumps a fragment spectrum as mgf
|
8
|
+
#
|
9
|
+
# Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
|
10
|
+
# configurations specify various details of the dump, including the
|
11
|
+
# precision and default headers.
|
12
|
+
#
|
13
|
+
# % tap run -- fragment TVQQEL --: dump/mgf
|
14
|
+
#
|
15
|
+
# :startdoc::task-
|
16
|
+
#
|
17
|
+
# Examples:
|
18
|
+
#
|
19
|
+
# # reformat an mgf file (inefficient but works)
|
20
|
+
# % rap load/mgf --file OLD_FILE --:i dump/mgf --mz-precision 2 > NEW_FILE
|
21
|
+
#
|
22
|
+
class Mgf < Tap::Tasks::Dump
|
23
|
+
|
24
|
+
config :default_headers, {}, &c.hash # A hash of default headers
|
25
|
+
config :mz_precision, 6, &c.integer # The precision of mzs
|
26
|
+
config :intensity_precision, 0, &c.integer # The precision of intensities
|
27
|
+
config :pepmass_precision, 6, &c.integer # The precision of peptide mass
|
28
|
+
|
29
|
+
config :prefix, nil, &c.string_or_nil # An optional prefix
|
30
|
+
config :suffix, "\n", &c.string_or_nil # An optional suffix
|
31
|
+
|
32
|
+
# Maps common variations of header keys (typically output
|
33
|
+
# by a fragment task) to Mgf::Entry header strings.
|
34
|
+
HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
|
35
|
+
|
36
|
+
# Dumps the object to io as YAML.
|
37
|
+
def dump(obj, io)
|
38
|
+
unless obj.kind_of?(Ms::Mascot::Mgf::Entry)
|
39
|
+
data, headers = obj
|
40
|
+
mgf_headers = format_headers(headers)
|
41
|
+
obj = Ms::Mascot::Mgf::Entry.new(mgf_headers, data)
|
42
|
+
end
|
43
|
+
|
44
|
+
io << prefix if prefix
|
45
|
+
obj.dump(io, config)
|
46
|
+
io << suffix if suffix
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
|
51
|
+
# helper to format the headers properly for an mgf entry
|
52
|
+
def format_headers(headers) # :nodoc:
|
53
|
+
headers ||= {}
|
54
|
+
mgf_headers = {}
|
55
|
+
default_headers.merge(headers).each_pair do |key, value|
|
56
|
+
key = HEADER_MAP[key] || key.to_s.upcase
|
57
|
+
mgf_headers[key] = value
|
58
|
+
end
|
59
|
+
mgf_headers
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/ms/mascot/export.rb
CHANGED
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
|
|
3
3
|
|
4
4
|
module Ms
|
5
5
|
module Mascot
|
6
|
-
# :startdoc::
|
6
|
+
# :startdoc::task exports results from a search
|
7
7
|
class Export < Tap::Mechanize::Submit
|
8
8
|
include Validation
|
9
9
|
|
@@ -15,7 +15,7 @@ module Ms
|
|
15
15
|
config "pep_expect", "1", &MASCOT_SWITCH
|
16
16
|
config "prot_mass", "1", &MASCOT_SWITCH
|
17
17
|
config "protein_master", "1", &MASCOT_SWITCH
|
18
|
-
config "_server_mudpit_switch", 0.000000001, &c.
|
18
|
+
config "_server_mudpit_switch", 0.000000001, &c.numeric
|
19
19
|
config "pep_exp_mz", "1", &MASCOT_SWITCH
|
20
20
|
config "do_export", "1", &MASCOT_SWITCH
|
21
21
|
config "pep_delta", "1", &MASCOT_SWITCH
|
@@ -37,7 +37,7 @@ module Ms
|
|
37
37
|
config "_showallfromerrortolerant", ""
|
38
38
|
config "prot_hit_num", "1", &MASCOT_SWITCH
|
39
39
|
config "search_master", "1", &MASCOT_SWITCH
|
40
|
-
config "_sigthreshold", 0.05, &c.
|
40
|
+
config "_sigthreshold", 0.05, &c.numeric
|
41
41
|
config "show_params", "1", &MASCOT_SWITCH
|
42
42
|
config "show_mods", "1", &MASCOT_SWITCH
|
43
43
|
config "show_header", "1", &MASCOT_SWITCH
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Formats
|
6
|
+
class Xml
|
7
|
+
module Utils
|
8
|
+
def hashify(nodes, cast=true)
|
9
|
+
nodes.inject({}) do |hash, node|
|
10
|
+
value = node.content
|
11
|
+
hash[node.name] = cast ? objectify(value) : value
|
12
|
+
hash
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def objectify(value)
|
17
|
+
case value
|
18
|
+
when nil
|
19
|
+
nil
|
20
|
+
when /\A\d+(\.\d+)?\z/
|
21
|
+
$1 ? value.to_f : value.to_i
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :doc
|
29
|
+
|
30
|
+
def initialize(xml)
|
31
|
+
xml = xml.read if xml.respond_to?(:read)
|
32
|
+
xml = xml.sub(%q{xmlns="http://www.matrixscience.com/xmlns/schema/mascot_search_results_2"}, "")
|
33
|
+
@doc = Nokogiri::XML(xml)
|
34
|
+
end
|
35
|
+
|
36
|
+
def header
|
37
|
+
doc.at("/mascot_search_results/header")
|
38
|
+
end
|
39
|
+
|
40
|
+
def modifications
|
41
|
+
doc.xpath("/mascot_search_results/variable_mods/modification")
|
42
|
+
end
|
43
|
+
|
44
|
+
def search_parameters
|
45
|
+
doc.xpath("/mascot_search_results/search_parameters")
|
46
|
+
end
|
47
|
+
|
48
|
+
def format_parameters
|
49
|
+
doc.xpath("/mascot_search_results/format_parameters")
|
50
|
+
end
|
51
|
+
|
52
|
+
def hits
|
53
|
+
doc.xpath("/mascot_search_results/hits/hit")
|
54
|
+
end
|
55
|
+
|
56
|
+
def proteins(hit)
|
57
|
+
hit.xpath("protein")
|
58
|
+
end
|
59
|
+
|
60
|
+
def peptides(protein)
|
61
|
+
protein.xpath("peptide")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/ms/mascot/fragment.rb
CHANGED
@@ -4,7 +4,7 @@ require 'ms/mascot/spectrum'
|
|
4
4
|
module Ms
|
5
5
|
module Mascot
|
6
6
|
|
7
|
-
# :startdoc::
|
7
|
+
# :startdoc::task calculates a theoretical Mascot ms/ms spectrum
|
8
8
|
#
|
9
9
|
# Calculates the theoretical Mascot ms/ms spectrum for a peptide sequence.
|
10
10
|
# A Mascot spectrum differs from the standard in-silico spectrum only in
|
@@ -23,7 +23,7 @@ module Ms
|
|
23
23
|
# See Ms::Mascot::Spectrum for more details.
|
24
24
|
class Fragment < InSilico::Fragment
|
25
25
|
|
26
|
-
config :intensity, 1, &c.
|
26
|
+
config :intensity, 1, &c.numeric_or_nil # a uniform intensity value
|
27
27
|
|
28
28
|
# Generates some MGF-specific headers.
|
29
29
|
def headers(spec)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'tap/tasks/load'
|
2
|
+
require 'ms/mascot/mgf/archive'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Mascot
|
6
|
+
module Load
|
7
|
+
|
8
|
+
# :startdoc::task loads entries from an mgf file
|
9
|
+
#
|
10
|
+
# Load entries from an mgf file. A selector may be specified to select
|
11
|
+
# only a subset of entries; by default all entries in the mgf file are
|
12
|
+
# returned.
|
13
|
+
#
|
14
|
+
# % tap run -- mgf/load --select 1 < MGF_FILE
|
15
|
+
# % tap run -- mgf/load --select 1..10 < MGF_FILE
|
16
|
+
#
|
17
|
+
# Entries are always returned as an array, even when the selecton is
|
18
|
+
# for a single entry.
|
19
|
+
#
|
20
|
+
class Mgf < Tap::Tasks::Load
|
21
|
+
Archive = Ms::Mascot::Mgf::Archive
|
22
|
+
|
23
|
+
config :select, nil do |input| # An array selector for entries
|
24
|
+
if input.kind_of?(String)
|
25
|
+
input = "!ruby/range #{input}" if input =~ /\.{2,3}/
|
26
|
+
input = YAML.load(input)
|
27
|
+
end
|
28
|
+
c.validate(input, [nil, Integer, Range, Array])
|
29
|
+
end
|
30
|
+
|
31
|
+
nest :filter do
|
32
|
+
config :title, nil, &c.regexp_or_nil
|
33
|
+
config :charge, nil, &c.range_or_nil
|
34
|
+
config :pepmass, nil, &c.range_or_nil
|
35
|
+
config :n_ions, nil, &c.range_or_nil
|
36
|
+
|
37
|
+
def ok?(entry)
|
38
|
+
(!title || entry.title =~ title) &&
|
39
|
+
(!charge || charge.include?(entry.charge)) &&
|
40
|
+
(!pepmass || pepmass.include?(entry.pepmass)) &&
|
41
|
+
(!n_ions || n_ions.include?(entry.data.length))
|
42
|
+
end
|
43
|
+
|
44
|
+
def filter!(entries)
|
45
|
+
return entries unless title || charge || pepmass || n_ions
|
46
|
+
entries.select {|entry| ok?(entry) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def open_io(input)
|
51
|
+
if input.kind_of?(String)
|
52
|
+
Archive.open(input) {|io| yield(io) }
|
53
|
+
else
|
54
|
+
super(input) do |io|
|
55
|
+
arc = Archive.new(io)
|
56
|
+
result = yield(arc)
|
57
|
+
arc.close
|
58
|
+
result
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def load(arc)
|
64
|
+
case select
|
65
|
+
when Integer
|
66
|
+
entry = arc[select]
|
67
|
+
filter.ok?(entry) ? [entry] : []
|
68
|
+
when Range
|
69
|
+
filter.filter!(arc[select])
|
70
|
+
when Array
|
71
|
+
filter.filter!(arc[*select])
|
72
|
+
when nil
|
73
|
+
arc.select {|entry| filter.ok?(entry) }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/ms/mascot/mgf/entry.rb
CHANGED
@@ -136,9 +136,14 @@ module Ms
|
|
136
136
|
(options[:headers] || headers.keys).each do |key|
|
137
137
|
target << "#{key.upcase}=#{headers[key]}\n"
|
138
138
|
end
|
139
|
-
|
140
|
-
|
141
|
-
|
139
|
+
|
140
|
+
if charge
|
141
|
+
target << "CHARGE=#{charge_to_s}\n"
|
142
|
+
end
|
143
|
+
|
144
|
+
if pepmass
|
145
|
+
target << "PEPMASS=#{format options[:pepmass_precision]}\n" % pepmass
|
146
|
+
end
|
142
147
|
|
143
148
|
entry = data[0]
|
144
149
|
data_format = case
|
data/lib/ms/mascot/submit.rb
CHANGED
@@ -3,7 +3,7 @@ require 'ms/mascot/validation'
|
|
3
3
|
|
4
4
|
module Ms
|
5
5
|
module Mascot
|
6
|
-
# :startdoc::
|
6
|
+
# :startdoc::task submits a PMF or MS/MS search to Mascot
|
7
7
|
#
|
8
8
|
# Submits a search request to Mascot using the mgf file and the search
|
9
9
|
# parameters in a static config file. Correctly formatting search
|
@@ -46,37 +46,38 @@ module Ms
|
|
46
46
|
config :uri, "http://www.matrixscience.com/cgi/nph-mascot.exe?1" # The uri of the mascot search site
|
47
47
|
|
48
48
|
# Parameters for MS/MS searching of a human sample digested with trypsin
|
49
|
-
nest :params do
|
50
|
-
config "
|
51
|
-
config "
|
52
|
-
config "INSTRUMENT", "Default", &c.string
|
53
|
-
config "REPTYPE", "peptide", &c.string
|
49
|
+
nest :params do # The query parameters
|
50
|
+
config "USERNAME", "Name", &c.string
|
51
|
+
config "USEREMAIL", '', &c.string
|
54
52
|
config "COM", "Search Title", &c.string
|
53
|
+
config "INSTRUMENT", "Default", &c.string
|
55
54
|
config "FORMAT", "Mascot generic", &c.string
|
56
|
-
config "PEAK", "AUTO", &c.string
|
57
55
|
config "CHARGE", "+2"
|
58
|
-
config "INTERMEDIATE", "", &c.string
|
59
|
-
config "SHOWALLMODS", "", &c.string
|
60
|
-
config "PRECURSOR", "", &c.string
|
61
|
-
config "USERNAME", "Name", &c.string
|
62
56
|
config "TOLU", "ppm", &c.string
|
63
|
-
config "USEREMAIL", '', &c.string
|
64
57
|
config "CLE", "Trypsin", &c.string
|
65
|
-
config "TOL", 100, &c.
|
58
|
+
config "TOL", 100, &c.numeric
|
66
59
|
config "ITOLU", "Da", &c.string
|
67
|
-
config "
|
68
|
-
config "SEARCH", "MIS", &c.string
|
60
|
+
config "PFA", 1, &MASCOT_SWITCH
|
69
61
|
config "DB", "SwissProt", &c.string
|
70
|
-
config "PEP_ISOTOPE_ERROR", 0, &c.num
|
71
62
|
config "ITOL", 0.6, &c.float
|
72
|
-
config "FORMVER", 1.01, &c.float
|
73
63
|
config "IT_MODS", [
|
74
64
|
"Acetyl (Protein N-term)",
|
75
65
|
"Gln->pyro-Glu (N-term Q)",
|
76
|
-
"Oxidation (M)"
|
66
|
+
"Oxidation (M)"
|
67
|
+
], &c.list
|
77
68
|
config "MASS", "Monoisotopic", &c.string
|
78
69
|
config "REPORT", "AUTO", &c.string
|
79
70
|
config "TAXONOMY", ". . . . . . . . . . . . . . . . Homo sapiens (human)", &c.string
|
71
|
+
config "INTERMEDIATE", "", &c.string
|
72
|
+
config "PRECURSOR", "", &c.string
|
73
|
+
config "QUANTITATION", "None", &c.string
|
74
|
+
config "PEP_ISOTOPE_ERROR", 0, &c.numeric
|
75
|
+
config "SEARCH", "MIS", :type => :hidden
|
76
|
+
config "PEAK", "AUTO", :type => :hidden
|
77
|
+
config "SHOWALLMODS", "", :type => :hidden
|
78
|
+
config "ErrTolRepeat", 0, :type => :hidden
|
79
|
+
config "REPTYPE", "peptide", :type => :hidden
|
80
|
+
config "FORMVER", 1.01, :type => :hidden
|
80
81
|
end
|
81
82
|
|
82
83
|
def process(mgf_file)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-mascot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Chiang
|
@@ -10,31 +10,31 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-05-25 00:00:00 -06:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name: tap
|
17
|
+
name: tap-mechanize
|
18
18
|
type: :runtime
|
19
19
|
version_requirement:
|
20
20
|
version_requirements: !ruby/object:Gem::Requirement
|
21
21
|
requirements:
|
22
22
|
- - ">="
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: 0.
|
24
|
+
version: 0.6.0
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
27
|
+
name: external
|
28
28
|
type: :runtime
|
29
29
|
version_requirement:
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 0.
|
34
|
+
version: 0.3.0
|
35
35
|
version:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
37
|
+
name: ms-in_silico
|
38
38
|
type: :runtime
|
39
39
|
version_requirement:
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,14 +44,24 @@ dependencies:
|
|
44
44
|
version: 0.3.0
|
45
45
|
version:
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
48
|
-
type: :
|
47
|
+
name: minitest
|
48
|
+
type: :development
|
49
|
+
version_requirement:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.3.1
|
55
|
+
version:
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: tap-test
|
58
|
+
type: :development
|
49
59
|
version_requirement:
|
50
60
|
version_requirements: !ruby/object:Gem::Requirement
|
51
61
|
requirements:
|
52
62
|
- - ">="
|
53
63
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
64
|
+
version: 0.1.0
|
55
65
|
version:
|
56
66
|
description:
|
57
67
|
email: simon.a.chiang@gmail.com
|
@@ -77,9 +87,11 @@ files:
|
|
77
87
|
- lib/ms/mascot/dat/section.rb
|
78
88
|
- lib/ms/mascot/dat/summary.rb
|
79
89
|
- lib/ms/mascot/dat/summary/id.rb
|
90
|
+
- lib/ms/mascot/dump/mgf.rb
|
80
91
|
- lib/ms/mascot/export.rb
|
81
|
-
- lib/ms/mascot/
|
92
|
+
- lib/ms/mascot/formats/xml.rb
|
82
93
|
- lib/ms/mascot/fragment.rb
|
94
|
+
- lib/ms/mascot/load/mgf.rb
|
83
95
|
- lib/ms/mascot/mgf.rb
|
84
96
|
- lib/ms/mascot/mgf/archive.rb
|
85
97
|
- lib/ms/mascot/mgf/entry.rb
|
data/lib/ms/mascot/format_mgf.rb
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
require 'ms/mascot/mgf/entry'
|
2
|
-
|
3
|
-
module Ms
|
4
|
-
module Mascot
|
5
|
-
# :startdoc::manifest formats an fragment spectrum as mgf
|
6
|
-
#
|
7
|
-
# Formats the data produced by an Ms::Mascot::Fragment task as an mgf. The
|
8
|
-
# configurations specify various details of the dump, including the
|
9
|
-
# precision and default headers.
|
10
|
-
#
|
11
|
-
# % tap run -- fragment TVQQEL --:s format_mgf
|
12
|
-
#
|
13
|
-
# (note: be sure to use the splat option on the join)
|
14
|
-
#
|
15
|
-
class FormatMgf < Tap::Task
|
16
|
-
|
17
|
-
config :default_headers, {}, &c.hash # A hash of default headers
|
18
|
-
config :mz_precision, 6, &c.integer # The precision of mzs
|
19
|
-
config :intensity_precision, 0, &c.integer # The precision of intensities
|
20
|
-
config :pepmass_precision, 6, &c.integer # The precision of peptide mass
|
21
|
-
|
22
|
-
config :prefix, nil, &c.string_or_nil # An optional prefix
|
23
|
-
config :suffix, "\n", &c.string_or_nil # An optional suffix
|
24
|
-
|
25
|
-
# Maps header keys (typically output by a fragment task)
|
26
|
-
# to Mgf::Entry header strings.
|
27
|
-
HEADER_MAP = {:parent_ion_mass => 'PEPMASS'}
|
28
|
-
|
29
|
-
def process(data, headers)
|
30
|
-
lines = []
|
31
|
-
lines << prefix if prefix
|
32
|
-
|
33
|
-
mgf_headers = format_headers(headers)
|
34
|
-
Ms::Mascot::Mgf::Entry.new(mgf_headers, data).dump(lines, config)
|
35
|
-
|
36
|
-
lines << suffix if suffix
|
37
|
-
lines.join("")
|
38
|
-
end
|
39
|
-
|
40
|
-
protected
|
41
|
-
|
42
|
-
# helper to format the headers properly for an mgf entry
|
43
|
-
def format_headers(headers) # :nodoc:
|
44
|
-
mgf_headers = {}
|
45
|
-
default_headers.merge(headers).each_pair do |key, value|
|
46
|
-
key = HEADER_MAP[key] || key.to_s.upcase
|
47
|
-
mgf_headers[key] = value
|
48
|
-
end
|
49
|
-
mgf_headers
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|