ms-mascot 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +9 -0
- data/lib/ms/mascot/dat.rb +16 -0
- data/lib/ms/mascot/dat/archive.rb +198 -0
- data/lib/ms/mascot/dat/header.rb +4 -0
- data/lib/ms/mascot/dat/index.rb +23 -0
- data/lib/ms/mascot/dat/masses.rb +4 -0
- data/lib/ms/mascot/dat/parameters.rb +4 -0
- data/lib/ms/mascot/dat/peptides.rb +4 -0
- data/lib/ms/mascot/dat/proteins.rb +4 -0
- data/lib/ms/mascot/dat/query.rb +12 -0
- data/lib/ms/mascot/dat/section.rb +86 -0
- data/lib/ms/mascot/dat/summary.rb +8 -0
- data/lib/ms/mascot/dat/summary/id.rb +54 -0
- data/lib/ms/mascot/export.rb +75 -10
- data/lib/ms/mascot/format_mgf.rb +54 -0
- data/lib/ms/mascot/fragment.rb +29 -25
- data/lib/ms/mascot/mgf.rb +35 -2
- data/lib/ms/mascot/mgf/entry.rb +23 -5
- data/lib/ms/mascot/spectrum.rb +18 -3
- data/lib/ms/mascot/submit.rb +120 -29
- data/tap.yml +0 -0
- metadata +29 -31
- data/cmd/generate_mgf.rb +0 -123
- data/cmd/generate_prospector_mgf.rb +0 -123
- data/cmd/reformat_mgf.rb +0 -90
- data/lib/ms/mascot/predict.rb +0 -94
data/History
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'ms/mascot/dat/archive'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
class << self
|
7
|
+
# gives the block the opened Ms::Mascot::Dat::Archive object
|
8
|
+
def open(filename, &block)
|
9
|
+
Archive.open(filename, &block)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'external'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
|
7
|
+
# A hash of (section_name, SectionClass) pairs mapping section names
|
8
|
+
# to section class. Initially SectionClass may be a require path; if
|
9
|
+
# so the path is required and the class looked up like:
|
10
|
+
#
|
11
|
+
# Ms::Mascot::Dat.const_get(section_name.capitalize)
|
12
|
+
#
|
13
|
+
# Such that 'header' is mapped to Ms::Mascot::Dat::Header.
|
14
|
+
CONTENT_TYPE_CLASSES = {}
|
15
|
+
|
16
|
+
# currently unimplemented: unimod enzyme taxonomy mixture quantitation
|
17
|
+
%w{header index masses parameters peptides proteins summary query
|
18
|
+
}.each do |section_name|
|
19
|
+
CONTENT_TYPE_CLASSES[section_name] = "ms/mascot/dat/#{section_name}"
|
20
|
+
end
|
21
|
+
|
22
|
+
# Provides access to a Mascot dat file.
|
23
|
+
class Archive < ExternalArchive
|
24
|
+
module Utils
|
25
|
+
module_function
|
26
|
+
|
27
|
+
# Parses a hash of metadata (content_type, boundary, etc) from io.
|
28
|
+
# parse_metadata does not reposition io.
|
29
|
+
def parse_metadata(io)
|
30
|
+
current_pos = io.pos
|
31
|
+
io.rewind
|
32
|
+
|
33
|
+
metadata = {}
|
34
|
+
line = io.readline
|
35
|
+
unless line =~ /MIME-Version: (\d+\.\d+) \(Generated by Mascot version (\d+\.\d+)\)/
|
36
|
+
raise "could not parse mime-version or mascot-version: #{line}"
|
37
|
+
end
|
38
|
+
metadata[:mime_version] = $1
|
39
|
+
metadata[:mascot_version] = $2
|
40
|
+
|
41
|
+
line = io.readline
|
42
|
+
unless line =~ /Content-Type: (.*?); boundary=(.*)/
|
43
|
+
raise "could not parse content-type: #{line}"
|
44
|
+
end
|
45
|
+
metadata[:content_type] = $1
|
46
|
+
metadata[:boundary] = $2
|
47
|
+
|
48
|
+
io.pos = current_pos
|
49
|
+
metadata
|
50
|
+
end
|
51
|
+
|
52
|
+
# Parses a mascot-style content type declaration. This method uses
|
53
|
+
# a simple regexp and is very brittle, but it works for all known
|
54
|
+
# dat files.
|
55
|
+
def parse_content_type(str)
|
56
|
+
unless str =~ /^Content-Type: (.*?); name=\"(.*)\"/
|
57
|
+
raise "unparseable content-type declaration: #{str.inspect}"
|
58
|
+
end
|
59
|
+
|
60
|
+
{:content_type => $1, :section_name => $2}
|
61
|
+
end
|
62
|
+
|
63
|
+
# Resolves a content type class from a hash of metadata like:
|
64
|
+
#
|
65
|
+
# metadata = {
|
66
|
+
# :content_type => 'application/x-Mascot',
|
67
|
+
# :section_name => 'header'
|
68
|
+
# }
|
69
|
+
# Dat.content_type_class(metadata) # => Ms::Mascot::Dat::Header
|
70
|
+
#
|
71
|
+
# Raises an error if the content type is not 'application/x-Mascot'
|
72
|
+
# or if the name is not registered in CONTENT_TYPE_CLASSES.
|
73
|
+
def content_type_class(metadata)
|
74
|
+
unless metadata[:content_type] == 'application/x-Mascot'
|
75
|
+
raise "unknown content_type: #{metadata.inspect}"
|
76
|
+
end
|
77
|
+
|
78
|
+
name = metadata[:section_name]
|
79
|
+
name = 'query' if name =~ /^query(\d+)$/
|
80
|
+
case const = CONTENT_TYPE_CLASSES[name]
|
81
|
+
when String
|
82
|
+
require const
|
83
|
+
CONTENT_TYPE_CLASSES[name] = Dat.const_get(name.capitalize)
|
84
|
+
else
|
85
|
+
const
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
include Utils
|
91
|
+
|
92
|
+
# A hash of metadata associated with this dat file.
|
93
|
+
attr_reader :metadata
|
94
|
+
|
95
|
+
def initialize(io=nil, io_index=nil)
|
96
|
+
super(io)
|
97
|
+
@metadata = parse_metadata(io)
|
98
|
+
@section_names = []
|
99
|
+
end
|
100
|
+
|
101
|
+
# The boundary separating sections, typically '--gc0p4Jq0M2Yt08jU534c0p'.
|
102
|
+
def boundary
|
103
|
+
"--#{metadata[:boundary]}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# Reindexes self.
|
107
|
+
def reindex(&block)
|
108
|
+
@section_names.clear
|
109
|
+
reindex_by_sep(boundary,
|
110
|
+
:entry_follows_sep => true,
|
111
|
+
:exclude_sep => true,
|
112
|
+
&block)
|
113
|
+
|
114
|
+
# remove the first and last entries, which contain
|
115
|
+
# the metadata and indicate the end of the multipart
|
116
|
+
# form data.
|
117
|
+
io_index.shift
|
118
|
+
io_index.pop
|
119
|
+
|
120
|
+
self
|
121
|
+
end
|
122
|
+
|
123
|
+
# Converts str into an entry according to the content type header
|
124
|
+
# which should be present at the start of the string.
|
125
|
+
def str_to_entry(str)
|
126
|
+
if ctc = content_type_class(parse_content_type(str))
|
127
|
+
ctc.parse(str)
|
128
|
+
else
|
129
|
+
str
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# The section names corresponding to each entry in self.
|
134
|
+
#
|
135
|
+
# Normally section names are lazily parsed from the Content-Type header
|
136
|
+
# of an entry as needed. If resolve is true, all section names are
|
137
|
+
# parsed and then returned; otherwise section_names may return a
|
138
|
+
# partially-filled array.
|
139
|
+
def section_names(resolve=true)
|
140
|
+
resolve_sections if resolve
|
141
|
+
@section_names
|
142
|
+
end
|
143
|
+
|
144
|
+
# Returns the entry for the named section.
|
145
|
+
def section(name)
|
146
|
+
self[section_index(name)]
|
147
|
+
end
|
148
|
+
|
149
|
+
# Returns the index of the named section.
|
150
|
+
def section_index(name)
|
151
|
+
0.upto(length - 1) do |index|
|
152
|
+
return index if section_name(index) == name
|
153
|
+
end
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns the section name for the entry at index.
|
158
|
+
def section_name(index)
|
159
|
+
# all sections must be resolved for negative indicies to
|
160
|
+
# work correctly (since otherwise @section_names may not
|
161
|
+
# have the same length as self)
|
162
|
+
resolve_sections if index < 0
|
163
|
+
@section_names[index] ||= parse_section_name(index)
|
164
|
+
end
|
165
|
+
|
166
|
+
def each_query(&block)
|
167
|
+
section('index').queries.each do |key|
|
168
|
+
block.call( self.section(key) )
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def query(num)
|
173
|
+
if si = section_index("query#{num}")
|
174
|
+
self[si]
|
175
|
+
else
|
176
|
+
nil
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
private
|
181
|
+
|
182
|
+
# resolves each section
|
183
|
+
def resolve_sections # :nodoc:
|
184
|
+
(@section_names.length).upto(length - 1) do |index|
|
185
|
+
section_name(index)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# helper to go to the entry at index and parse the section name
|
190
|
+
def parse_section_name(index) # :nodoc:
|
191
|
+
return nil unless index = io_index[index]
|
192
|
+
io.pos = index[0] + 1
|
193
|
+
parse_content_type(io.readline)[:section_name]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'ms/mascot/dat/section'
|
2
|
+
|
3
|
+
class Ms::Mascot::Dat::Index < Ms::Mascot::Dat::Section
|
4
|
+
|
5
|
+
def nqueries
|
6
|
+
@nqueries ||= data.keys.select {|key| key =~ /query/ }.length
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def query(index)
|
11
|
+
query_key = "query#{index}"
|
12
|
+
data.each_pair do |key, value|
|
13
|
+
return value if key == query_key
|
14
|
+
end
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
# returns all query sections
|
19
|
+
def queries
|
20
|
+
data.keys.grep( /^query(\d+)$/o ).sort
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'ms/mascot/dat/section'
|
2
|
+
|
3
|
+
class Ms::Mascot::Dat::Query < Ms::Mascot::Dat::Section
|
4
|
+
|
5
|
+
attr_reader :index
|
6
|
+
|
7
|
+
def initialize(data={}, section_name=self.class.section_name)
|
8
|
+
super(data, section_name)
|
9
|
+
@index = section_name.strip[5..-1].to_i
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
|
7
|
+
# Represents a 'section' section of a dat file, formatted like this:
|
8
|
+
#
|
9
|
+
# Content-Type: application/x-Mascot; name="parameters"
|
10
|
+
#
|
11
|
+
# LICENSE=Licensed to: Matrix Science Internal use only - Frill, (4 processors).
|
12
|
+
# MP=
|
13
|
+
# NM=
|
14
|
+
# COM=Peptide Mass Fingerprint Example
|
15
|
+
# IATOL=
|
16
|
+
# ...
|
17
|
+
#
|
18
|
+
# Example from mascot data F981122.dat
|
19
|
+
class Section
|
20
|
+
|
21
|
+
# Matches a content-type declaration plus any preceding/following
|
22
|
+
# whitespace. The section name is matched in slot 0.
|
23
|
+
CONTENT_TYPE_REGEXP = /\s*Content-Type: application\/x-Mascot; name=\"(.*?)\"\n\s*/
|
24
|
+
|
25
|
+
# A format string used to format parameters as a string.
|
26
|
+
TO_S_FORMAT = "%s=%s\n"
|
27
|
+
|
28
|
+
class << self
|
29
|
+
|
30
|
+
# Parses a new instance from str. Section after then content-type
|
31
|
+
# declaration are parsed into the parameters hash. Section follow
|
32
|
+
# a simple "key=value\n" pattern.
|
33
|
+
def parse(str)
|
34
|
+
params = {}
|
35
|
+
scanner = StringScanner.new(str)
|
36
|
+
|
37
|
+
# skip whitespace and content type declaration
|
38
|
+
unless scanner.scan(CONTENT_TYPE_REGEXP)
|
39
|
+
raise "unknown content type: #{content_type}"
|
40
|
+
end
|
41
|
+
section_name = scanner[1]
|
42
|
+
|
43
|
+
# scan each pair.
|
44
|
+
while key = scanner.scan(/[^=]+/)
|
45
|
+
scanner.skip(/=/)
|
46
|
+
params[key] = scanner.scan(/[^\n]*/)
|
47
|
+
scanner.skip(/\n/)
|
48
|
+
end
|
49
|
+
|
50
|
+
new(params, section_name)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns the name of the section represented by this class. Section
|
54
|
+
# names are by default the downcase, unnested class name, for
|
55
|
+
# example:
|
56
|
+
#
|
57
|
+
# Ms::Mascot::Dat::Section.section_name # => "parameters"
|
58
|
+
#
|
59
|
+
def section_name
|
60
|
+
@section_name ||= to_s.split('::').last.downcase
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# A hash of data in self.
|
65
|
+
attr_reader :data
|
66
|
+
|
67
|
+
# The class section_name.
|
68
|
+
attr_reader :section_name
|
69
|
+
|
70
|
+
def initialize(data={}, section_name=self.class.section_name)
|
71
|
+
@data = data
|
72
|
+
@section_name = section_name
|
73
|
+
end
|
74
|
+
|
75
|
+
# Formats self as a string with the content-type header.
|
76
|
+
def to_s
|
77
|
+
%Q{
|
78
|
+
|
79
|
+
Content-Type: application/x-Mascot; name="#{section_name}"
|
80
|
+
|
81
|
+
#{data.to_a.collect {|entry| TO_S_FORMAT % entry}.join}}
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'ms/mascot/dat/section'
|
2
|
+
|
3
|
+
# Summaries differ in their meaning depending on the type of search but the
|
4
|
+
# content is in the same format. The best way to add a sensible api and to
|
5
|
+
# keep the basic archive lookup structure is to define modules that extend
|
6
|
+
# a summary with, say an MS/MS ion search api.
|
7
|
+
class Ms::Mascot::Dat::Summary < Ms::Mascot::Dat::Section
|
8
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'ms/mascot/dat/summary'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
class Summary
|
7
|
+
class Id < Ms::Mascot::Dat::Summary
|
8
|
+
class Peptide
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def from_strs(hit_string, hit_terms_string)
|
12
|
+
vals = hit_string.split(',')
|
13
|
+
vals.push( *(hit_terms_string.split(',')) )
|
14
|
+
self.new(*vals)
|
15
|
+
end
|
16
|
+
|
17
|
+
def from_hash(hash)
|
18
|
+
obj = self.new
|
19
|
+
hash.each do |k,v|
|
20
|
+
obj[k.to_sym] = v
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
PEPTIDE_ATTS = %w{
|
26
|
+
ui0 calc_mr delta start end num_match seq rank ui8 score ui11 ui12 ui13 ui14 ui15 res_before res_after
|
27
|
+
}.map {|v| v.to_sym }
|
28
|
+
|
29
|
+
CASTING = {
|
30
|
+
:calc_mr => 'to_f',
|
31
|
+
:delta => 'to_f',
|
32
|
+
:start => 'to_i',
|
33
|
+
:end => 'to_i',
|
34
|
+
:num_match => 'to_i',
|
35
|
+
:rank => 'to_i',
|
36
|
+
:score => 'to_f'}
|
37
|
+
|
38
|
+
PEPTIDE_ATTS.each do |attribute|
|
39
|
+
if cast_method = CASTING[attribute]
|
40
|
+
attr_writer attribute
|
41
|
+
define_method(attribute) do
|
42
|
+
instance_variable_get("@#{attribute}").send(cast_method)
|
43
|
+
end
|
44
|
+
else
|
45
|
+
attr_accessor attribute
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|