ms-mascot 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History +11 -1
- data/lib/ms/mascot.rb +4 -1
- data/lib/ms/mascot/dat/archive.rb +85 -8
- data/lib/ms/mascot/dat/header.rb +12 -0
- data/lib/ms/mascot/dat/index.rb +13 -19
- data/lib/ms/mascot/dat/masses.rb +14 -0
- data/lib/ms/mascot/dat/parameters.rb +14 -0
- data/lib/ms/mascot/dat/peptides.rb +211 -2
- data/lib/ms/mascot/dat/proteins.rb +73 -1
- data/lib/ms/mascot/dat/query.rb +141 -6
- data/lib/ms/mascot/dat/section.rb +9 -5
- data/lib/ms/mascot/dat/summary.rb +223 -5
- data/lib/ms/mascot/export.rb +55 -72
- data/lib/ms/mascot/format_mgf.rb +7 -8
- data/lib/ms/mascot/fragment.rb +1 -1
- data/lib/ms/mascot/mgf.rb +32 -2
- data/lib/ms/mascot/mgf/archive.rb +8 -1
- data/lib/ms/mascot/submit.rb +52 -69
- data/lib/ms/mascot/validation.rb +17 -0
- metadata +7 -6
data/lib/ms/mascot/export.rb
CHANGED
@@ -1,84 +1,67 @@
|
|
1
|
-
require 'tap/
|
1
|
+
require 'tap/mechanize/submit'
|
2
|
+
require 'ms/mascot/validation'
|
2
3
|
|
3
4
|
module Ms
|
4
5
|
module Mascot
|
5
6
|
# :startdoc::manifest exports results from a search
|
6
|
-
class Export < Tap::
|
7
|
-
|
7
|
+
class Export < Tap::Mechanize::Submit
|
8
|
+
include Validation
|
9
|
+
|
8
10
|
# The MatrixScience public search site
|
9
|
-
|
11
|
+
config :uri, "http://www.matrixscience.com/cgi/export_dat_2.pl"
|
10
12
|
|
11
13
|
# Parameters for a typical export
|
12
|
-
|
13
|
-
"pep_expect"
|
14
|
-
"prot_mass"
|
15
|
-
"protein_master"
|
16
|
-
"_server_mudpit_switch"
|
17
|
-
"pep_exp_mz"
|
18
|
-
"do_export"
|
19
|
-
"pep_delta"
|
20
|
-
"export_format"
|
21
|
-
"prot_acc"
|
22
|
-
"pep_score"
|
23
|
-
"show_format"
|
24
|
-
"_showsubsets"
|
25
|
-
"_show_decoy_report"
|
26
|
-
"pep_scan_title"
|
27
|
-
"pep_miss"
|
28
|
-
"pep_calc_mr"
|
29
|
-
"pep_exp_mr"
|
30
|
-
"prot_score"
|
31
|
-
"pep_query"
|
32
|
-
"peptide_master"
|
33
|
-
"prot_matches"
|
34
|
-
"_onlyerrortolerant"
|
35
|
-
"_showallfromerrortolerant"
|
36
|
-
"prot_hit_num"
|
37
|
-
"search_master"
|
38
|
-
"_sigthreshold"
|
39
|
-
"show_params"
|
40
|
-
"show_mods"
|
41
|
-
"show_header"
|
42
|
-
"pep_isbold"
|
43
|
-
"pep_seq"
|
44
|
-
"pep_exp_z"
|
45
|
-
"prot_desc"
|
46
|
-
"_ignoreionsscorebelow"
|
47
|
-
"REPORT"
|
48
|
-
"pep_rank"
|
49
|
-
"pep_var_mod"
|
50
|
-
"_noerrortolerant"
|
51
|
-
|
52
|
-
|
53
|
-
# Typical headers for an export
|
54
|
-
DEFAULT_HEADERS = {
|
55
|
-
"Keep-Alive"=>"300",
|
56
|
-
"Accept-Encoding"=>"gzip,deflate",
|
57
|
-
"Accept-Language"=>"en-us,en;q=0.5",
|
58
|
-
"Content-Type"=> "multipart/form-data; boundary=---------------------------168072824752491622650073",
|
59
|
-
"Accept-Charset"=>"ISO-8859-1,utf-8;q=0.7,*;q=0.7",
|
60
|
-
"Accept"=>"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
61
|
-
"Connection"=>"keep-alive"
|
62
|
-
}
|
63
|
-
|
64
|
-
config :uri, DEFAULT_URI # The uri of the mascot search site
|
65
|
-
config :headers, DEFAULT_HEADERS, &c.hash # a hash of request headers
|
66
|
-
config :params, DEFAULT_PARAMS, &c.hash # a hash of query parameters
|
67
|
-
config :request_method, 'GET' # the request method (get or post)
|
68
|
-
config :version, 1.1 # the HTTP version
|
69
|
-
config :redirection_limit, nil, &c.integer_or_nil # the redirection limit for the request
|
14
|
+
nest :params do
|
15
|
+
config "pep_expect", "1", &MASCOT_SWITCH
|
16
|
+
config "prot_mass", "1", &MASCOT_SWITCH
|
17
|
+
config "protein_master", "1", &MASCOT_SWITCH
|
18
|
+
config "_server_mudpit_switch", 0.000000001, &c.num
|
19
|
+
config "pep_exp_mz", "1", &MASCOT_SWITCH
|
20
|
+
config "do_export", "1", &MASCOT_SWITCH
|
21
|
+
config "pep_delta", "1", &MASCOT_SWITCH
|
22
|
+
config "export_format", "XML", &c.string
|
23
|
+
config "prot_acc", "1", &MASCOT_SWITCH
|
24
|
+
config "pep_score", "1", &MASCOT_SWITCH
|
25
|
+
config "show_format", "1", &MASCOT_SWITCH
|
26
|
+
config "_showsubsets", "0", &MASCOT_SWITCH
|
27
|
+
config "_show_decoy_report", ""
|
28
|
+
config "pep_scan_title", "1", &MASCOT_SWITCH
|
29
|
+
config "pep_miss", "1", &MASCOT_SWITCH
|
30
|
+
config "pep_calc_mr", "1", &MASCOT_SWITCH
|
31
|
+
config "pep_exp_mr", "1", &MASCOT_SWITCH
|
32
|
+
config "prot_score", "1", &MASCOT_SWITCH
|
33
|
+
config "pep_query", "1", &MASCOT_SWITCH
|
34
|
+
config "peptide_master", "1", &MASCOT_SWITCH
|
35
|
+
config "prot_matches", "1", &MASCOT_SWITCH
|
36
|
+
config "_onlyerrortolerant", ""
|
37
|
+
config "_showallfromerrortolerant", ""
|
38
|
+
config "prot_hit_num", "1", &MASCOT_SWITCH
|
39
|
+
config "search_master", "1", &MASCOT_SWITCH
|
40
|
+
config "_sigthreshold", 0.05, &c.num
|
41
|
+
config "show_params", "1", &MASCOT_SWITCH
|
42
|
+
config "show_mods", "1", &MASCOT_SWITCH
|
43
|
+
config "show_header", "1", &MASCOT_SWITCH
|
44
|
+
config "pep_isbold", "1", &MASCOT_SWITCH
|
45
|
+
config "pep_seq", "1", &MASCOT_SWITCH
|
46
|
+
config "pep_exp_z", "1", &MASCOT_SWITCH
|
47
|
+
config "prot_desc", "1", &MASCOT_SWITCH
|
48
|
+
config "_ignoreionsscorebelow", "0", &MASCOT_SWITCH
|
49
|
+
config "REPORT", "AUTO", &c.string
|
50
|
+
config "pep_rank", "1", &MASCOT_SWITCH
|
51
|
+
config "pep_var_mod", "1", &MASCOT_SWITCH
|
52
|
+
config "_noerrortolerant", ""
|
53
|
+
end
|
70
54
|
|
71
55
|
def process(result_filepath)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
super(request)
|
56
|
+
params = config[:params].to_hash
|
57
|
+
params['file'] = result_filepath
|
58
|
+
|
59
|
+
# submit request
|
60
|
+
super(
|
61
|
+
:request_method => 'GET',
|
62
|
+
:uri => uri,
|
63
|
+
:params => params
|
64
|
+
)
|
82
65
|
end
|
83
66
|
end
|
84
67
|
end
|
data/lib/ms/mascot/format_mgf.rb
CHANGED
@@ -8,20 +8,19 @@ module Ms
|
|
8
8
|
# configurations specify various details of the dump, including the
|
9
9
|
# precision and default headers.
|
10
10
|
#
|
11
|
-
# %
|
11
|
+
# % tap run -- fragment TVQQEL --:s format_mgf
|
12
12
|
#
|
13
13
|
# (note: be sure to use the splat option on the join)
|
14
14
|
#
|
15
15
|
class FormatMgf < Tap::Task
|
16
16
|
|
17
|
-
config :default_headers, {}, &c.hash #
|
18
|
-
config :
|
19
|
-
config :
|
20
|
-
config :
|
21
|
-
config :pepmass_precision, 6, &c.integer # the precision of peptide mass
|
17
|
+
config :default_headers, {}, &c.hash # A hash of default headers
|
18
|
+
config :mz_precision, 6, &c.integer # The precision of mzs
|
19
|
+
config :intensity_precision, 0, &c.integer # The precision of intensities
|
20
|
+
config :pepmass_precision, 6, &c.integer # The precision of peptide mass
|
22
21
|
|
23
|
-
config :prefix, nil, &c.string_or_nil #
|
24
|
-
config :suffix, "\n", &c.string_or_nil #
|
22
|
+
config :prefix, nil, &c.string_or_nil # An optional prefix
|
23
|
+
config :suffix, "\n", &c.string_or_nil # An optional suffix
|
25
24
|
|
26
25
|
# Maps header keys (typically output by a fragment task)
|
27
26
|
# to Mgf::Entry header strings.
|
data/lib/ms/mascot/fragment.rb
CHANGED
data/lib/ms/mascot/mgf.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
require 'ms/mascot/mgf/entry'
|
2
2
|
require 'ms/mascot/mgf/archive'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
module Ms
|
5
6
|
module Mascot
|
6
7
|
module Mgf
|
8
|
+
# see http://www.matrixscience.com/help/data_file_help.html
|
9
|
+
VALID_LOCAL_HEADERS = Set.new(%w(CHARGE COMP ETAG INSTRUMENT IT_MODS PEPMASS RTINSECONDS SCANS SEQ TAG TITLE TOL TOLU))
|
10
|
+
VALID_GLOBAL_HEADERS = Set.new(%w(ACCESSION CHARGE CLE COM CUTOUT DB DECOY ERRORTOLERANT FORMAT FRAMES INSTRUMENT IT_MODS ITOL ITOLU MASS MODS PEP_ISOTOPE_ERROR PFA PRECURSOR QUANTITATION REPORT REPTYPE SEARCH SEG TAXONOMY TOL TOLU USER00 USER01 USER02 USER03 USER04 USER05 USER06 USER07 USER08 USER09 USER10 USER11 USER12 USEREMAIL USERNAME))
|
11
|
+
|
7
12
|
class << self
|
8
13
|
# Opens the file and yields an array of entries (well, the array is
|
9
14
|
# actually an Ms::Mascot::Mgf::Archive object that acts like an array
|
@@ -26,10 +31,35 @@ module Ms
|
|
26
31
|
# returns each entry in the mgf file, like IO.foreach
|
27
32
|
def foreach(file, &block)
|
28
33
|
open(file) do |ar|
|
29
|
-
ar.each &block
|
34
|
+
ar.each( &block )
|
30
35
|
end
|
31
36
|
end
|
32
|
-
|
37
|
+
|
38
|
+
# yields an Ms::Mascot::Mgf::Archive object and writes the data to
|
39
|
+
# outfile.
|
40
|
+
#
|
41
|
+
# example of writing spetra to "out.mgf":
|
42
|
+
#
|
43
|
+
# Ms::Mascot::Mgf.write("out.mgf") do |mgf|
|
44
|
+
# # use the Query#to_mgf method:
|
45
|
+
# mgf << query.to_mgf(peptide_hit)
|
46
|
+
#
|
47
|
+
# # create your own entry object
|
48
|
+
# mgf << Ms::Mascot::Dat::Mgf::Entry.new(header, data)
|
49
|
+
#
|
50
|
+
# # push on the strings
|
51
|
+
# mgf << "BEGIN IONS"
|
52
|
+
# mgf << "TITLE=mytitle"
|
53
|
+
# # ... the rest of the info ...
|
54
|
+
# mgf << "END IONS"
|
55
|
+
# end
|
56
|
+
def write(outfile)
|
57
|
+
mgf = Archive.new
|
58
|
+
yield mgf
|
59
|
+
mgf.close(outfile, nil, true)
|
60
|
+
end
|
61
|
+
|
62
|
+
end # end module methods
|
33
63
|
end
|
34
64
|
end
|
35
65
|
end
|
@@ -8,6 +8,13 @@ module Ms
|
|
8
8
|
# Provides array-like access to an mgf archival file.
|
9
9
|
class Archive < ExternalArchive
|
10
10
|
|
11
|
+
# yields an object for writing
|
12
|
+
def self.write(filename)
|
13
|
+
mgf = self.new
|
14
|
+
File.open(filename, 'w') do |out|
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
11
18
|
# Reindexes self to each mgf entry in io
|
12
19
|
def reindex(&block)
|
13
20
|
reindex_by_sep("BEGIN IONS", :entry_follows_sep => true, &block)
|
@@ -21,4 +28,4 @@ module Ms
|
|
21
28
|
end
|
22
29
|
end
|
23
30
|
end
|
24
|
-
end
|
31
|
+
end
|
data/lib/ms/mascot/submit.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
require 'tap/
|
1
|
+
require 'tap/mechanize/request'
|
2
|
+
require 'ms/mascot/validation'
|
2
3
|
|
3
4
|
module Ms
|
4
5
|
module Mascot
|
@@ -28,55 +29,8 @@ module Ms
|
|
28
29
|
# values MUST be overridden and are only provided as a template (for
|
29
30
|
# those that want the adventure of manually making a config file).
|
30
31
|
#
|
31
|
-
class Submit < Tap::
|
32
|
-
|
33
|
-
# The MatrixScience public search site
|
34
|
-
DEFAULT_URI = "http://www.matrixscience.com/cgi/nph-mascot.exe?1"
|
35
|
-
|
36
|
-
# Parameters for MS/MS searching of a human sample digested with trypsin.
|
37
|
-
DEFAULT_PARAMS = {
|
38
|
-
"ErrTolRepeat"=>"0",
|
39
|
-
"PFA"=>"1",
|
40
|
-
"INSTRUMENT"=>"Default",
|
41
|
-
"REPTYPE"=>"peptide",
|
42
|
-
"COM"=>"Search Title",
|
43
|
-
"FORMAT"=>"Mascot generic",
|
44
|
-
"PEAK"=>"AUTO",
|
45
|
-
"CHARGE"=>"2+",
|
46
|
-
"INTERMEDIATE"=>"",
|
47
|
-
"SHOWALLMODS"=>"",
|
48
|
-
"PRECURSOR"=>"",
|
49
|
-
"USERNAME"=>"Name",
|
50
|
-
"TOLU"=>"ppm",
|
51
|
-
"USEREMAIL"=>"email@email.com",
|
52
|
-
"CLE"=>"Trypsin",
|
53
|
-
"TOL"=>"100",
|
54
|
-
"ITOLU"=>"Da",
|
55
|
-
"QUANTITATION"=>"None",
|
56
|
-
"SEARCH"=>"MIS",
|
57
|
-
"DB"=>"SwissProt",
|
58
|
-
"PEP_ISOTOPE_ERROR"=>"0",
|
59
|
-
"ITOL"=>"0.6",
|
60
|
-
"FORMVER"=>"1.01",
|
61
|
-
"IT_MODS"=> [
|
62
|
-
"Acetyl (Protein N-term)",
|
63
|
-
"Gln->pyro-Glu (N-term Q)",
|
64
|
-
"Oxidation (M)"],
|
65
|
-
"MASS"=>"Monoisotopic",
|
66
|
-
"REPORT"=>"AUTO",
|
67
|
-
"TAXONOMY"=>". . . . . . . . . . . . . . . . Homo sapiens (human)"
|
68
|
-
}
|
69
|
-
|
70
|
-
# Typical headers for an MS/MS search.
|
71
|
-
DEFAULT_HEADERS = {
|
72
|
-
"Keep-Alive"=>"300",
|
73
|
-
"Accept-Encoding"=>"gzip,deflate",
|
74
|
-
"Accept-Language"=>"en-us,en;q=0.5",
|
75
|
-
"Content-Type"=> "multipart/form-data; boundary=---------------------------168072824752491622650073",
|
76
|
-
"Accept-Charset"=>"ISO-8859-1,utf-8;q=0.7,*;q=0.7",
|
77
|
-
"Accept"=>"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
78
|
-
"Connection"=>"keep-alive"
|
79
|
-
}
|
32
|
+
class Submit < Tap::Mechanize::Request
|
33
|
+
include Validation
|
80
34
|
|
81
35
|
# Matches a successful search response. After the match:
|
82
36
|
#
|
@@ -88,29 +42,58 @@ module Ms
|
|
88
42
|
# $1:: the failure message
|
89
43
|
FAILURE_REGEXP = /<BR>(.*)/m
|
90
44
|
|
91
|
-
|
92
|
-
config :
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
45
|
+
# The MatrixScience public search site
|
46
|
+
config :uri, "http://www.matrixscience.com/cgi/nph-mascot.exe?1" # The uri of the mascot search site
|
47
|
+
|
48
|
+
# Parameters for MS/MS searching of a human sample digested with trypsin
|
49
|
+
nest :params do # The query parameters
|
50
|
+
config "ErrTolRepeat", 0, &MASCOT_SWITCH
|
51
|
+
config "PFA", 1, &MASCOT_SWITCH
|
52
|
+
config "INSTRUMENT", "Default", &c.string
|
53
|
+
config "REPTYPE", "peptide", &c.string
|
54
|
+
config "COM", "Search Title", &c.string
|
55
|
+
config "FORMAT", "Mascot generic", &c.string
|
56
|
+
config "PEAK", "AUTO", &c.string
|
57
|
+
config "CHARGE", "+2"
|
58
|
+
config "INTERMEDIATE", "", &c.string
|
59
|
+
config "SHOWALLMODS", "", &c.string
|
60
|
+
config "PRECURSOR", "", &c.string
|
61
|
+
config "USERNAME", "Name", &c.string
|
62
|
+
config "TOLU", "ppm", &c.string
|
63
|
+
config "USEREMAIL", '', &c.string
|
64
|
+
config "CLE", "Trypsin", &c.string
|
65
|
+
config "TOL", 100, &c.num
|
66
|
+
config "ITOLU", "Da", &c.string
|
67
|
+
config "QUANTITATION", "None", &c.string
|
68
|
+
config "SEARCH", "MIS", &c.string
|
69
|
+
config "DB", "SwissProt", &c.string
|
70
|
+
config "PEP_ISOTOPE_ERROR", 0, &c.num
|
71
|
+
config "ITOL", 0.6, &c.float
|
72
|
+
config "FORMVER", 1.01, &c.float
|
73
|
+
config "IT_MODS", [
|
74
|
+
"Acetyl (Protein N-term)",
|
75
|
+
"Gln->pyro-Glu (N-term Q)",
|
76
|
+
"Oxidation (M)"], &c.list
|
77
|
+
config "MASS", "Monoisotopic", &c.string
|
78
|
+
config "REPORT", "AUTO", &c.string
|
79
|
+
config "TAXONOMY", ". . . . . . . . . . . . . . . . Homo sapiens (human)", &c.string
|
80
|
+
end
|
97
81
|
|
98
82
|
def process(mgf_file)
|
83
|
+
File.open(mgf_file) do |io|
|
84
|
+
# set filename for upload
|
85
|
+
params = config[:params].to_hash
|
86
|
+
params['FILE'] = io
|
99
87
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
88
|
+
# submit request
|
89
|
+
page = super(
|
90
|
+
:request_method => 'POST',
|
91
|
+
:uri => uri,
|
92
|
+
:params => params
|
93
|
+
)
|
94
|
+
|
95
|
+
parse_response_body(page.body)
|
104
96
|
end
|
105
|
-
|
106
|
-
# set filename for upload
|
107
|
-
file = request[:params]['FILE'] ||= {}
|
108
|
-
file['Filename'] = mgf_file
|
109
|
-
file['Content-Type'] = 'application/octet-stream'
|
110
|
-
file.delete('Content')
|
111
|
-
|
112
|
-
# submit request
|
113
|
-
parse_response_body super(request)
|
114
97
|
end
|
115
98
|
|
116
99
|
# Processes the response body. Returns the result file if the body
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ms
|
2
|
+
module Mascot
|
3
|
+
module Validation
|
4
|
+
MASCOT_SWITCH = lambda do |input|
|
5
|
+
input = case input
|
6
|
+
when true, 1, '1', /true/i then '1'
|
7
|
+
when false, 0, '0', /false/i then '0'
|
8
|
+
else input
|
9
|
+
end
|
10
|
+
|
11
|
+
Configurable::Validation::validate(input, ['1', '0'])
|
12
|
+
end
|
13
|
+
|
14
|
+
Configurable::DEFAULT_ATTRIBUTES[MASCOT_SWITCH] = {:type => :switch}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-mascot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Chiang
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-03-31 00:00:00 -06:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -21,17 +21,17 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - ">="
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
24
|
+
version: 0.12.4
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name: tap-
|
27
|
+
name: tap-mechanize
|
28
28
|
type: :runtime
|
29
29
|
version_requirement:
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 0.
|
34
|
+
version: 0.5.1
|
35
35
|
version:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: external
|
@@ -51,7 +51,7 @@ dependencies:
|
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.2.
|
54
|
+
version: 0.2.3
|
55
55
|
version:
|
56
56
|
description:
|
57
57
|
email: simon.a.chiang@gmail.com
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/ms/mascot/mgf/entry.rb
|
86
86
|
- lib/ms/mascot/spectrum.rb
|
87
87
|
- lib/ms/mascot/submit.rb
|
88
|
+
- lib/ms/mascot/validation.rb
|
88
89
|
- tap.yml
|
89
90
|
- README
|
90
91
|
- MIT-LICENSE
|