ms-mascot 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/History ADDED
@@ -0,0 +1,9 @@
1
+ == 0.12.2 / 2009-02-23
2
+
3
+ Updated release utilizing Tap.
4
+
5
+ * Added/Updated tasks for predicting spectra
6
+ * Added Submit/Export tasks
7
+ * Added preliminary .dat support
8
+ * Fixed bugs for series like y0, b*
9
+ * Converted tests to MiniTest specs
@@ -0,0 +1,16 @@
1
+ require 'ms/mascot/dat/archive'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+ class << self
7
+ # gives the block the opened Ms::Mascot::Dat::Archive object
8
+ def open(filename, &block)
9
+ Archive.open(filename, &block)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+
16
+
@@ -0,0 +1,198 @@
1
+ require 'external'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+
7
+ # A hash of (section_name, SectionClass) pairs mapping section names
8
+ # to section class. Initially SectionClass may be a require path; if
9
+ # so the path is required and the class looked up like:
10
+ #
11
+ # Ms::Mascot::Dat.const_get(section_name.capitalize)
12
+ #
13
+ # Such that 'header' is mapped to Ms::Mascot::Dat::Header.
14
+ CONTENT_TYPE_CLASSES = {}
15
+
16
+ # currently unimplemented: unimod enzyme taxonomy mixture quantitation
17
+ %w{header index masses parameters peptides proteins summary query
18
+ }.each do |section_name|
19
+ CONTENT_TYPE_CLASSES[section_name] = "ms/mascot/dat/#{section_name}"
20
+ end
21
+
22
+ # Provides access to a Mascot dat file.
23
+ class Archive < ExternalArchive
24
+ module Utils
25
+ module_function
26
+
27
+ # Parses a hash of metadata (content_type, boundary, etc) from io.
28
+ # parse_metadata does not reposition io.
29
+ def parse_metadata(io)
30
+ current_pos = io.pos
31
+ io.rewind
32
+
33
+ metadata = {}
34
+ line = io.readline
35
+ unless line =~ /MIME-Version: (\d+\.\d+) \(Generated by Mascot version (\d+\.\d+)\)/
36
+ raise "could not parse mime-version or mascot-version: #{line}"
37
+ end
38
+ metadata[:mime_version] = $1
39
+ metadata[:mascot_version] = $2
40
+
41
+ line = io.readline
42
+ unless line =~ /Content-Type: (.*?); boundary=(.*)/
43
+ raise "could not parse content-type: #{line}"
44
+ end
45
+ metadata[:content_type] = $1
46
+ metadata[:boundary] = $2
47
+
48
+ io.pos = current_pos
49
+ metadata
50
+ end
51
+
52
+ # Parses a mascot-style content type declaration. This method uses
53
+ # a simple regexp and is very brittle, but it works for all known
54
+ # dat files.
55
+ def parse_content_type(str)
56
+ unless str =~ /^Content-Type: (.*?); name=\"(.*)\"/
57
+ raise "unparseable content-type declaration: #{str.inspect}"
58
+ end
59
+
60
+ {:content_type => $1, :section_name => $2}
61
+ end
62
+
63
+ # Resolves a content type class from a hash of metadata like:
64
+ #
65
+ # metadata = {
66
+ # :content_type => 'application/x-Mascot',
67
+ # :section_name => 'header'
68
+ # }
69
+ # Dat.content_type_class(metadata) # => Ms::Mascot::Dat::Header
70
+ #
71
+ # Raises an error if the content type is not 'application/x-Mascot'
72
+ # or if the name is not registered in CONTENT_TYPE_CLASSES.
73
+ def content_type_class(metadata)
74
+ unless metadata[:content_type] == 'application/x-Mascot'
75
+ raise "unknown content_type: #{metadata.inspect}"
76
+ end
77
+
78
+ name = metadata[:section_name]
79
+ name = 'query' if name =~ /^query(\d+)$/
80
+ case const = CONTENT_TYPE_CLASSES[name]
81
+ when String
82
+ require const
83
+ CONTENT_TYPE_CLASSES[name] = Dat.const_get(name.capitalize)
84
+ else
85
+ const
86
+ end
87
+ end
88
+ end
89
+
90
+ include Utils
91
+
92
+ # A hash of metadata associated with this dat file.
93
+ attr_reader :metadata
94
+
95
+ def initialize(io=nil, io_index=nil)
96
+ super(io)
97
+ @metadata = parse_metadata(io)
98
+ @section_names = []
99
+ end
100
+
101
+ # The boundary separating sections, typically '--gc0p4Jq0M2Yt08jU534c0p'.
102
+ def boundary
103
+ "--#{metadata[:boundary]}"
104
+ end
105
+
106
+ # Reindexes self.
107
+ def reindex(&block)
108
+ @section_names.clear
109
+ reindex_by_sep(boundary,
110
+ :entry_follows_sep => true,
111
+ :exclude_sep => true,
112
+ &block)
113
+
114
+ # remove the first and last entries, which contain
115
+ # the metadata and indicate the end of the multipart
116
+ # form data.
117
+ io_index.shift
118
+ io_index.pop
119
+
120
+ self
121
+ end
122
+
123
+ # Converts str into an entry according to the content type header
124
+ # which should be present at the start of the string.
125
+ def str_to_entry(str)
126
+ if ctc = content_type_class(parse_content_type(str))
127
+ ctc.parse(str)
128
+ else
129
+ str
130
+ end
131
+ end
132
+
133
+ # The section names corresponding to each entry in self.
134
+ #
135
+ # Normally section names are lazily parsed from the Content-Type header
136
+ # of an entry as needed. If resolve is true, all section names are
137
+ # parsed and then returned; otherwise section_names may return a
138
+ # partially-filled array.
139
+ def section_names(resolve=true)
140
+ resolve_sections if resolve
141
+ @section_names
142
+ end
143
+
144
+ # Returns the entry for the named section.
145
+ def section(name)
146
+ self[section_index(name)]
147
+ end
148
+
149
+ # Returns the index of the named section.
150
+ def section_index(name)
151
+ 0.upto(length - 1) do |index|
152
+ return index if section_name(index) == name
153
+ end
154
+ nil
155
+ end
156
+
157
+ # Returns the section name for the entry at index.
158
+ def section_name(index)
159
+ # all sections must be resolved for negative indicies to
160
+ # work correctly (since otherwise @section_names may not
161
+ # have the same length as self)
162
+ resolve_sections if index < 0
163
+ @section_names[index] ||= parse_section_name(index)
164
+ end
165
+
166
+ def each_query(&block)
167
+ section('index').queries.each do |key|
168
+ block.call( self.section(key) )
169
+ end
170
+ end
171
+
172
+ def query(num)
173
+ if si = section_index("query#{num}")
174
+ self[si]
175
+ else
176
+ nil
177
+ end
178
+ end
179
+
180
+ private
181
+
182
+ # resolves each section
183
+ def resolve_sections # :nodoc:
184
+ (@section_names.length).upto(length - 1) do |index|
185
+ section_name(index)
186
+ end
187
+ end
188
+
189
+ # helper to go to the entry at index and parse the section name
190
+ def parse_section_name(index) # :nodoc:
191
+ return nil unless index = io_index[index]
192
+ io.pos = index[0] + 1
193
+ parse_content_type(io.readline)[:section_name]
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Header < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,23 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Index < Ms::Mascot::Dat::Section
4
+
5
+ def nqueries
6
+ @nqueries ||= data.keys.select {|key| key =~ /query/ }.length
7
+ end
8
+
9
+
10
+ def query(index)
11
+ query_key = "query#{index}"
12
+ data.each_pair do |key, value|
13
+ return value if key == query_key
14
+ end
15
+ nil
16
+ end
17
+
18
+ # returns all query sections
19
+ def queries
20
+ data.keys.grep( /^query(\d+)$/o ).sort
21
+ end
22
+
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Masses < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Parameters < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Peptides < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Proteins < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,12 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Query < Ms::Mascot::Dat::Section
4
+
5
+ attr_reader :index
6
+
7
+ def initialize(data={}, section_name=self.class.section_name)
8
+ super(data, section_name)
9
+ @index = section_name.strip[5..-1].to_i
10
+ end
11
+
12
+ end
@@ -0,0 +1,86 @@
1
+ require 'strscan'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+
7
+ # Represents a 'section' section of a dat file, formatted like this:
8
+ #
9
+ # Content-Type: application/x-Mascot; name="parameters"
10
+ #
11
+ # LICENSE=Licensed to: Matrix Science Internal use only - Frill, (4 processors).
12
+ # MP=
13
+ # NM=
14
+ # COM=Peptide Mass Fingerprint Example
15
+ # IATOL=
16
+ # ...
17
+ #
18
+ # Example from mascot data F981122.dat
19
+ class Section
20
+
21
+ # Matches a content-type declaration plus any preceding/following
22
+ # whitespace. The section name is matched in slot 0.
23
+ CONTENT_TYPE_REGEXP = /\s*Content-Type: application\/x-Mascot; name=\"(.*?)\"\n\s*/
24
+
25
+ # A format string used to format parameters as a string.
26
+ TO_S_FORMAT = "%s=%s\n"
27
+
28
+ class << self
29
+
30
+ # Parses a new instance from str. Section after then content-type
31
+ # declaration are parsed into the parameters hash. Section follow
32
+ # a simple "key=value\n" pattern.
33
+ def parse(str)
34
+ params = {}
35
+ scanner = StringScanner.new(str)
36
+
37
+ # skip whitespace and content type declaration
38
+ unless scanner.scan(CONTENT_TYPE_REGEXP)
39
+ raise "unknown content type: #{content_type}"
40
+ end
41
+ section_name = scanner[1]
42
+
43
+ # scan each pair.
44
+ while key = scanner.scan(/[^=]+/)
45
+ scanner.skip(/=/)
46
+ params[key] = scanner.scan(/[^\n]*/)
47
+ scanner.skip(/\n/)
48
+ end
49
+
50
+ new(params, section_name)
51
+ end
52
+
53
+ # Returns the name of the section represented by this class. Section
54
+ # names are by default the downcase, unnested class name, for
55
+ # example:
56
+ #
57
+ # Ms::Mascot::Dat::Section.section_name # => "parameters"
58
+ #
59
+ def section_name
60
+ @section_name ||= to_s.split('::').last.downcase
61
+ end
62
+ end
63
+
64
+ # A hash of data in self.
65
+ attr_reader :data
66
+
67
+ # The class section_name.
68
+ attr_reader :section_name
69
+
70
+ def initialize(data={}, section_name=self.class.section_name)
71
+ @data = data
72
+ @section_name = section_name
73
+ end
74
+
75
+ # Formats self as a string with the content-type header.
76
+ def to_s
77
+ %Q{
78
+
79
+ Content-Type: application/x-Mascot; name="#{section_name}"
80
+
81
+ #{data.to_a.collect {|entry| TO_S_FORMAT % entry}.join}}
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,8 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ # Summaries differ in their meaning depending on the type of search but the
4
+ # content is in the same format. The best way to add a sensible api and to
5
+ # keep the basic archive lookup structure is to define modules that extend
6
+ # a summary with, say an MS/MS ion search api.
7
+ class Ms::Mascot::Dat::Summary < Ms::Mascot::Dat::Section
8
+ end
@@ -0,0 +1,54 @@
1
+ require 'ms/mascot/dat/summary'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+ class Summary
7
+ class Id < Ms::Mascot::Dat::Summary
8
+ class Peptide
9
+
10
+ class << self
11
+ def from_strs(hit_string, hit_terms_string)
12
+ vals = hit_string.split(',')
13
+ vals.push( *(hit_terms_string.split(',')) )
14
+ self.new(*vals)
15
+ end
16
+
17
+ def from_hash(hash)
18
+ obj = self.new
19
+ hash.each do |k,v|
20
+ obj[k.to_sym] = v
21
+ end
22
+ end
23
+ end
24
+
25
+ PEPTIDE_ATTS = %w{
26
+ ui0 calc_mr delta start end num_match seq rank ui8 score ui11 ui12 ui13 ui14 ui15 res_before res_after
27
+ }.map {|v| v.to_sym }
28
+
29
+ CASTING = {
30
+ :calc_mr => 'to_f',
31
+ :delta => 'to_f',
32
+ :start => 'to_i',
33
+ :end => 'to_i',
34
+ :num_match => 'to_i',
35
+ :rank => 'to_i',
36
+ :score => 'to_f'}
37
+
38
+ PEPTIDE_ATTS.each do |attribute|
39
+ if cast_method = CASTING[attribute]
40
+ attr_writer attribute
41
+ define_method(attribute) do
42
+ instance_variable_get("@#{attribute}").send(cast_method)
43
+ end
44
+ else
45
+ attr_accessor attribute
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end