ms-mascot 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +9 -0
- data/lib/ms/mascot/dat.rb +16 -0
- data/lib/ms/mascot/dat/archive.rb +198 -0
- data/lib/ms/mascot/dat/header.rb +4 -0
- data/lib/ms/mascot/dat/index.rb +23 -0
- data/lib/ms/mascot/dat/masses.rb +4 -0
- data/lib/ms/mascot/dat/parameters.rb +4 -0
- data/lib/ms/mascot/dat/peptides.rb +4 -0
- data/lib/ms/mascot/dat/proteins.rb +4 -0
- data/lib/ms/mascot/dat/query.rb +12 -0
- data/lib/ms/mascot/dat/section.rb +86 -0
- data/lib/ms/mascot/dat/summary.rb +8 -0
- data/lib/ms/mascot/dat/summary/id.rb +54 -0
- data/lib/ms/mascot/export.rb +75 -10
- data/lib/ms/mascot/format_mgf.rb +54 -0
- data/lib/ms/mascot/fragment.rb +29 -25
- data/lib/ms/mascot/mgf.rb +35 -2
- data/lib/ms/mascot/mgf/entry.rb +23 -5
- data/lib/ms/mascot/spectrum.rb +18 -3
- data/lib/ms/mascot/submit.rb +120 -29
- data/tap.yml +0 -0
- metadata +29 -31
- data/cmd/generate_mgf.rb +0 -123
- data/cmd/generate_prospector_mgf.rb +0 -123
- data/cmd/reformat_mgf.rb +0 -90
- data/lib/ms/mascot/predict.rb +0 -94
data/History
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'ms/mascot/dat/archive'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
class << self
|
7
|
+
# gives the block the opened Ms::Mascot::Dat::Archive object
|
8
|
+
def open(filename, &block)
|
9
|
+
Archive.open(filename, &block)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'external'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
|
7
|
+
# A hash of (section_name, SectionClass) pairs mapping section names
|
8
|
+
# to section class. Initially SectionClass may be a require path; if
|
9
|
+
# so the path is required and the class looked up like:
|
10
|
+
#
|
11
|
+
# Ms::Mascot::Dat.const_get(section_name.capitalize)
|
12
|
+
#
|
13
|
+
# Such that 'header' is mapped to Ms::Mascot::Dat::Header.
|
14
|
+
CONTENT_TYPE_CLASSES = {}
|
15
|
+
|
16
|
+
# currently unimplemented: unimod enzyme taxonomy mixture quantitation
|
17
|
+
%w{header index masses parameters peptides proteins summary query
|
18
|
+
}.each do |section_name|
|
19
|
+
CONTENT_TYPE_CLASSES[section_name] = "ms/mascot/dat/#{section_name}"
|
20
|
+
end
|
21
|
+
|
22
|
+
# Provides access to a Mascot dat file.
|
23
|
+
class Archive < ExternalArchive
|
24
|
+
module Utils
|
25
|
+
module_function
|
26
|
+
|
27
|
+
# Parses a hash of metadata (content_type, boundary, etc) from io.
|
28
|
+
# parse_metadata does not reposition io.
|
29
|
+
def parse_metadata(io)
|
30
|
+
current_pos = io.pos
|
31
|
+
io.rewind
|
32
|
+
|
33
|
+
metadata = {}
|
34
|
+
line = io.readline
|
35
|
+
unless line =~ /MIME-Version: (\d+\.\d+) \(Generated by Mascot version (\d+\.\d+)\)/
|
36
|
+
raise "could not parse mime-version or mascot-version: #{line}"
|
37
|
+
end
|
38
|
+
metadata[:mime_version] = $1
|
39
|
+
metadata[:mascot_version] = $2
|
40
|
+
|
41
|
+
line = io.readline
|
42
|
+
unless line =~ /Content-Type: (.*?); boundary=(.*)/
|
43
|
+
raise "could not parse content-type: #{line}"
|
44
|
+
end
|
45
|
+
metadata[:content_type] = $1
|
46
|
+
metadata[:boundary] = $2
|
47
|
+
|
48
|
+
io.pos = current_pos
|
49
|
+
metadata
|
50
|
+
end
|
51
|
+
|
52
|
+
# Parses a mascot-style content type declaration. This method uses
|
53
|
+
# a simple regexp and is very brittle, but it works for all known
|
54
|
+
# dat files.
|
55
|
+
def parse_content_type(str)
|
56
|
+
unless str =~ /^Content-Type: (.*?); name=\"(.*)\"/
|
57
|
+
raise "unparseable content-type declaration: #{str.inspect}"
|
58
|
+
end
|
59
|
+
|
60
|
+
{:content_type => $1, :section_name => $2}
|
61
|
+
end
|
62
|
+
|
63
|
+
# Resolves a content type class from a hash of metadata like:
|
64
|
+
#
|
65
|
+
# metadata = {
|
66
|
+
# :content_type => 'application/x-Mascot',
|
67
|
+
# :section_name => 'header'
|
68
|
+
# }
|
69
|
+
# Dat.content_type_class(metadata) # => Ms::Mascot::Dat::Header
|
70
|
+
#
|
71
|
+
# Raises an error if the content type is not 'application/x-Mascot'
|
72
|
+
# or if the name is not registered in CONTENT_TYPE_CLASSES.
|
73
|
+
def content_type_class(metadata)
|
74
|
+
unless metadata[:content_type] == 'application/x-Mascot'
|
75
|
+
raise "unknown content_type: #{metadata.inspect}"
|
76
|
+
end
|
77
|
+
|
78
|
+
name = metadata[:section_name]
|
79
|
+
name = 'query' if name =~ /^query(\d+)$/
|
80
|
+
case const = CONTENT_TYPE_CLASSES[name]
|
81
|
+
when String
|
82
|
+
require const
|
83
|
+
CONTENT_TYPE_CLASSES[name] = Dat.const_get(name.capitalize)
|
84
|
+
else
|
85
|
+
const
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
include Utils
|
91
|
+
|
92
|
+
# A hash of metadata associated with this dat file.
|
93
|
+
attr_reader :metadata
|
94
|
+
|
95
|
+
def initialize(io=nil, io_index=nil)
|
96
|
+
super(io)
|
97
|
+
@metadata = parse_metadata(io)
|
98
|
+
@section_names = []
|
99
|
+
end
|
100
|
+
|
101
|
+
# The boundary separating sections, typically '--gc0p4Jq0M2Yt08jU534c0p'.
|
102
|
+
def boundary
|
103
|
+
"--#{metadata[:boundary]}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# Reindexes self.
|
107
|
+
def reindex(&block)
|
108
|
+
@section_names.clear
|
109
|
+
reindex_by_sep(boundary,
|
110
|
+
:entry_follows_sep => true,
|
111
|
+
:exclude_sep => true,
|
112
|
+
&block)
|
113
|
+
|
114
|
+
# remove the first and last entries, which contain
|
115
|
+
# the metadata and indicate the end of the multipart
|
116
|
+
# form data.
|
117
|
+
io_index.shift
|
118
|
+
io_index.pop
|
119
|
+
|
120
|
+
self
|
121
|
+
end
|
122
|
+
|
123
|
+
# Converts str into an entry according to the content type header
|
124
|
+
# which should be present at the start of the string.
|
125
|
+
def str_to_entry(str)
|
126
|
+
if ctc = content_type_class(parse_content_type(str))
|
127
|
+
ctc.parse(str)
|
128
|
+
else
|
129
|
+
str
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# The section names corresponding to each entry in self.
|
134
|
+
#
|
135
|
+
# Normally section names are lazily parsed from the Content-Type header
|
136
|
+
# of an entry as needed. If resolve is true, all section names are
|
137
|
+
# parsed and then returned; otherwise section_names may return a
|
138
|
+
# partially-filled array.
|
139
|
+
def section_names(resolve=true)
|
140
|
+
resolve_sections if resolve
|
141
|
+
@section_names
|
142
|
+
end
|
143
|
+
|
144
|
+
# Returns the entry for the named section.
|
145
|
+
def section(name)
|
146
|
+
self[section_index(name)]
|
147
|
+
end
|
148
|
+
|
149
|
+
# Returns the index of the named section.
|
150
|
+
def section_index(name)
|
151
|
+
0.upto(length - 1) do |index|
|
152
|
+
return index if section_name(index) == name
|
153
|
+
end
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns the section name for the entry at index.
|
158
|
+
def section_name(index)
|
159
|
+
# all sections must be resolved for negative indicies to
|
160
|
+
# work correctly (since otherwise @section_names may not
|
161
|
+
# have the same length as self)
|
162
|
+
resolve_sections if index < 0
|
163
|
+
@section_names[index] ||= parse_section_name(index)
|
164
|
+
end
|
165
|
+
|
166
|
+
def each_query(&block)
|
167
|
+
section('index').queries.each do |key|
|
168
|
+
block.call( self.section(key) )
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def query(num)
|
173
|
+
if si = section_index("query#{num}")
|
174
|
+
self[si]
|
175
|
+
else
|
176
|
+
nil
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
private
|
181
|
+
|
182
|
+
# resolves each section
|
183
|
+
def resolve_sections # :nodoc:
|
184
|
+
(@section_names.length).upto(length - 1) do |index|
|
185
|
+
section_name(index)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# helper to go to the entry at index and parse the section name
|
190
|
+
def parse_section_name(index) # :nodoc:
|
191
|
+
return nil unless index = io_index[index]
|
192
|
+
io.pos = index[0] + 1
|
193
|
+
parse_content_type(io.readline)[:section_name]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'ms/mascot/dat/section'
|
2
|
+
|
3
|
+
class Ms::Mascot::Dat::Index < Ms::Mascot::Dat::Section
|
4
|
+
|
5
|
+
def nqueries
|
6
|
+
@nqueries ||= data.keys.select {|key| key =~ /query/ }.length
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def query(index)
|
11
|
+
query_key = "query#{index}"
|
12
|
+
data.each_pair do |key, value|
|
13
|
+
return value if key == query_key
|
14
|
+
end
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
# returns all query sections
|
19
|
+
def queries
|
20
|
+
data.keys.grep( /^query(\d+)$/o ).sort
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'ms/mascot/dat/section'
|
2
|
+
|
3
|
+
class Ms::Mascot::Dat::Query < Ms::Mascot::Dat::Section
|
4
|
+
|
5
|
+
attr_reader :index
|
6
|
+
|
7
|
+
def initialize(data={}, section_name=self.class.section_name)
|
8
|
+
super(data, section_name)
|
9
|
+
@index = section_name.strip[5..-1].to_i
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
|
7
|
+
# Represents a 'section' section of a dat file, formatted like this:
|
8
|
+
#
|
9
|
+
# Content-Type: application/x-Mascot; name="parameters"
|
10
|
+
#
|
11
|
+
# LICENSE=Licensed to: Matrix Science Internal use only - Frill, (4 processors).
|
12
|
+
# MP=
|
13
|
+
# NM=
|
14
|
+
# COM=Peptide Mass Fingerprint Example
|
15
|
+
# IATOL=
|
16
|
+
# ...
|
17
|
+
#
|
18
|
+
# Example from mascot data F981122.dat
|
19
|
+
class Section
|
20
|
+
|
21
|
+
# Matches a content-type declaration plus any preceding/following
|
22
|
+
# whitespace. The section name is matched in slot 0.
|
23
|
+
CONTENT_TYPE_REGEXP = /\s*Content-Type: application\/x-Mascot; name=\"(.*?)\"\n\s*/
|
24
|
+
|
25
|
+
# A format string used to format parameters as a string.
|
26
|
+
TO_S_FORMAT = "%s=%s\n"
|
27
|
+
|
28
|
+
class << self
|
29
|
+
|
30
|
+
# Parses a new instance from str. Section after then content-type
|
31
|
+
# declaration are parsed into the parameters hash. Section follow
|
32
|
+
# a simple "key=value\n" pattern.
|
33
|
+
def parse(str)
|
34
|
+
params = {}
|
35
|
+
scanner = StringScanner.new(str)
|
36
|
+
|
37
|
+
# skip whitespace and content type declaration
|
38
|
+
unless scanner.scan(CONTENT_TYPE_REGEXP)
|
39
|
+
raise "unknown content type: #{content_type}"
|
40
|
+
end
|
41
|
+
section_name = scanner[1]
|
42
|
+
|
43
|
+
# scan each pair.
|
44
|
+
while key = scanner.scan(/[^=]+/)
|
45
|
+
scanner.skip(/=/)
|
46
|
+
params[key] = scanner.scan(/[^\n]*/)
|
47
|
+
scanner.skip(/\n/)
|
48
|
+
end
|
49
|
+
|
50
|
+
new(params, section_name)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns the name of the section represented by this class. Section
|
54
|
+
# names are by default the downcase, unnested class name, for
|
55
|
+
# example:
|
56
|
+
#
|
57
|
+
# Ms::Mascot::Dat::Section.section_name # => "parameters"
|
58
|
+
#
|
59
|
+
def section_name
|
60
|
+
@section_name ||= to_s.split('::').last.downcase
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# A hash of data in self.
|
65
|
+
attr_reader :data
|
66
|
+
|
67
|
+
# The class section_name.
|
68
|
+
attr_reader :section_name
|
69
|
+
|
70
|
+
def initialize(data={}, section_name=self.class.section_name)
|
71
|
+
@data = data
|
72
|
+
@section_name = section_name
|
73
|
+
end
|
74
|
+
|
75
|
+
# Formats self as a string with the content-type header.
|
76
|
+
def to_s
|
77
|
+
%Q{
|
78
|
+
|
79
|
+
Content-Type: application/x-Mascot; name="#{section_name}"
|
80
|
+
|
81
|
+
#{data.to_a.collect {|entry| TO_S_FORMAT % entry}.join}}
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'ms/mascot/dat/section'
|
2
|
+
|
3
|
+
# Summaries differ in their meaning depending on the type of search but the
|
4
|
+
# content is in the same format. The best way to add a sensible api and to
|
5
|
+
# keep the basic archive lookup structure is to define modules that extend
|
6
|
+
# a summary with, say an MS/MS ion search api.
|
7
|
+
class Ms::Mascot::Dat::Summary < Ms::Mascot::Dat::Section
|
8
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'ms/mascot/dat/summary'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Mascot
|
5
|
+
module Dat
|
6
|
+
class Summary
|
7
|
+
class Id < Ms::Mascot::Dat::Summary
|
8
|
+
class Peptide
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def from_strs(hit_string, hit_terms_string)
|
12
|
+
vals = hit_string.split(',')
|
13
|
+
vals.push( *(hit_terms_string.split(',')) )
|
14
|
+
self.new(*vals)
|
15
|
+
end
|
16
|
+
|
17
|
+
def from_hash(hash)
|
18
|
+
obj = self.new
|
19
|
+
hash.each do |k,v|
|
20
|
+
obj[k.to_sym] = v
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
PEPTIDE_ATTS = %w{
|
26
|
+
ui0 calc_mr delta start end num_match seq rank ui8 score ui11 ui12 ui13 ui14 ui15 res_before res_after
|
27
|
+
}.map {|v| v.to_sym }
|
28
|
+
|
29
|
+
CASTING = {
|
30
|
+
:calc_mr => 'to_f',
|
31
|
+
:delta => 'to_f',
|
32
|
+
:start => 'to_i',
|
33
|
+
:end => 'to_i',
|
34
|
+
:num_match => 'to_i',
|
35
|
+
:rank => 'to_i',
|
36
|
+
:score => 'to_f'}
|
37
|
+
|
38
|
+
PEPTIDE_ATTS.each do |attribute|
|
39
|
+
if cast_method = CASTING[attribute]
|
40
|
+
attr_writer attribute
|
41
|
+
define_method(attribute) do
|
42
|
+
instance_variable_get("@#{attribute}").send(cast_method)
|
43
|
+
end
|
44
|
+
else
|
45
|
+
attr_accessor attribute
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|