ms-mascot 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History ADDED
@@ -0,0 +1,9 @@
1
+ == 0.12.2 / 2009-02-23
2
+
3
+ Updated release utilizing Tap.
4
+
5
+ * Added/Updated tasks for predicting spectra
6
+ * Added Submit/Export tasks
7
+ * Added preliminary .dat support
8
+ * Fixed bugs for series like y0, b*
9
+ * Converted tests to MiniTest specs
@@ -0,0 +1,16 @@
1
+ require 'ms/mascot/dat/archive'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+ class << self
7
+ # gives the block the opened Ms::Mascot::Dat::Archive object
8
+ def open(filename, &block)
9
+ Archive.open(filename, &block)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+
16
+
@@ -0,0 +1,198 @@
1
+ require 'external'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+
7
+ # A hash of (section_name, SectionClass) pairs mapping section names
8
+ # to section class. Initially SectionClass may be a require path; if
9
+ # so the path is required and the class looked up like:
10
+ #
11
+ # Ms::Mascot::Dat.const_get(section_name.capitalize)
12
+ #
13
+ # Such that 'header' is mapped to Ms::Mascot::Dat::Header.
14
+ CONTENT_TYPE_CLASSES = {}
15
+
16
+ # currently unimplemented: unimod enzyme taxonomy mixture quantitation
17
+ %w{header index masses parameters peptides proteins summary query
18
+ }.each do |section_name|
19
+ CONTENT_TYPE_CLASSES[section_name] = "ms/mascot/dat/#{section_name}"
20
+ end
21
+
22
+ # Provides access to a Mascot dat file.
23
+ class Archive < ExternalArchive
24
+ module Utils
25
+ module_function
26
+
27
+ # Parses a hash of metadata (content_type, boundary, etc) from io.
28
+ # parse_metadata does not reposition io.
29
+ def parse_metadata(io)
30
+ current_pos = io.pos
31
+ io.rewind
32
+
33
+ metadata = {}
34
+ line = io.readline
35
+ unless line =~ /MIME-Version: (\d+\.\d+) \(Generated by Mascot version (\d+\.\d+)\)/
36
+ raise "could not parse mime-version or mascot-version: #{line}"
37
+ end
38
+ metadata[:mime_version] = $1
39
+ metadata[:mascot_version] = $2
40
+
41
+ line = io.readline
42
+ unless line =~ /Content-Type: (.*?); boundary=(.*)/
43
+ raise "could not parse content-type: #{line}"
44
+ end
45
+ metadata[:content_type] = $1
46
+ metadata[:boundary] = $2
47
+
48
+ io.pos = current_pos
49
+ metadata
50
+ end
51
+
52
+ # Parses a mascot-style content type declaration. This method uses
53
+ # a simple regexp and is very brittle, but it works for all known
54
+ # dat files.
55
+ def parse_content_type(str)
56
+ unless str =~ /^Content-Type: (.*?); name=\"(.*)\"/
57
+ raise "unparseable content-type declaration: #{str.inspect}"
58
+ end
59
+
60
+ {:content_type => $1, :section_name => $2}
61
+ end
62
+
63
+ # Resolves a content type class from a hash of metadata like:
64
+ #
65
+ # metadata = {
66
+ # :content_type => 'application/x-Mascot',
67
+ # :section_name => 'header'
68
+ # }
69
+ # Dat.content_type_class(metadata) # => Ms::Mascot::Dat::Header
70
+ #
71
+ # Raises an error if the content type is not 'application/x-Mascot'
72
+ # or if the name is not registered in CONTENT_TYPE_CLASSES.
73
+ def content_type_class(metadata)
74
+ unless metadata[:content_type] == 'application/x-Mascot'
75
+ raise "unknown content_type: #{metadata.inspect}"
76
+ end
77
+
78
+ name = metadata[:section_name]
79
+ name = 'query' if name =~ /^query(\d+)$/
80
+ case const = CONTENT_TYPE_CLASSES[name]
81
+ when String
82
+ require const
83
+ CONTENT_TYPE_CLASSES[name] = Dat.const_get(name.capitalize)
84
+ else
85
+ const
86
+ end
87
+ end
88
+ end
89
+
90
+ include Utils
91
+
92
+ # A hash of metadata associated with this dat file.
93
+ attr_reader :metadata
94
+
95
+ def initialize(io=nil, io_index=nil)
96
+ super(io)
97
+ @metadata = parse_metadata(io)
98
+ @section_names = []
99
+ end
100
+
101
+ # The boundary separating sections, typically '--gc0p4Jq0M2Yt08jU534c0p'.
102
+ def boundary
103
+ "--#{metadata[:boundary]}"
104
+ end
105
+
106
+ # Reindexes self.
107
+ def reindex(&block)
108
+ @section_names.clear
109
+ reindex_by_sep(boundary,
110
+ :entry_follows_sep => true,
111
+ :exclude_sep => true,
112
+ &block)
113
+
114
+ # remove the first and last entries, which contain
115
+ # the metadata and indicate the end of the multipart
116
+ # form data.
117
+ io_index.shift
118
+ io_index.pop
119
+
120
+ self
121
+ end
122
+
123
+ # Converts str into an entry according to the content type header
124
+ # which should be present at the start of the string.
125
+ def str_to_entry(str)
126
+ if ctc = content_type_class(parse_content_type(str))
127
+ ctc.parse(str)
128
+ else
129
+ str
130
+ end
131
+ end
132
+
133
+ # The section names corresponding to each entry in self.
134
+ #
135
+ # Normally section names are lazily parsed from the Content-Type header
136
+ # of an entry as needed. If resolve is true, all section names are
137
+ # parsed and then returned; otherwise section_names may return a
138
+ # partially-filled array.
139
+ def section_names(resolve=true)
140
+ resolve_sections if resolve
141
+ @section_names
142
+ end
143
+
144
+ # Returns the entry for the named section.
145
+ def section(name)
146
+ self[section_index(name)]
147
+ end
148
+
149
+ # Returns the index of the named section.
150
+ def section_index(name)
151
+ 0.upto(length - 1) do |index|
152
+ return index if section_name(index) == name
153
+ end
154
+ nil
155
+ end
156
+
157
+ # Returns the section name for the entry at index.
158
+ def section_name(index)
159
+ # all sections must be resolved for negative indicies to
160
+ # work correctly (since otherwise @section_names may not
161
+ # have the same length as self)
162
+ resolve_sections if index < 0
163
+ @section_names[index] ||= parse_section_name(index)
164
+ end
165
+
166
+ def each_query(&block)
167
+ section('index').queries.each do |key|
168
+ block.call( self.section(key) )
169
+ end
170
+ end
171
+
172
+ def query(num)
173
+ if si = section_index("query#{num}")
174
+ self[si]
175
+ else
176
+ nil
177
+ end
178
+ end
179
+
180
+ private
181
+
182
+ # resolves each section
183
+ def resolve_sections # :nodoc:
184
+ (@section_names.length).upto(length - 1) do |index|
185
+ section_name(index)
186
+ end
187
+ end
188
+
189
+ # helper to go to the entry at index and parse the section name
190
+ def parse_section_name(index) # :nodoc:
191
+ return nil unless index = io_index[index]
192
+ io.pos = index[0] + 1
193
+ parse_content_type(io.readline)[:section_name]
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Header < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,23 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Index < Ms::Mascot::Dat::Section
4
+
5
+ def nqueries
6
+ @nqueries ||= data.keys.select {|key| key =~ /query/ }.length
7
+ end
8
+
9
+
10
+ def query(index)
11
+ query_key = "query#{index}"
12
+ data.each_pair do |key, value|
13
+ return value if key == query_key
14
+ end
15
+ nil
16
+ end
17
+
18
+ # returns all query sections
19
+ def queries
20
+ data.keys.grep( /^query(\d+)$/o ).sort
21
+ end
22
+
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Masses < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Parameters < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Peptides < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Proteins < Ms::Mascot::Dat::Section
4
+ end
@@ -0,0 +1,12 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ class Ms::Mascot::Dat::Query < Ms::Mascot::Dat::Section
4
+
5
+ attr_reader :index
6
+
7
+ def initialize(data={}, section_name=self.class.section_name)
8
+ super(data, section_name)
9
+ @index = section_name.strip[5..-1].to_i
10
+ end
11
+
12
+ end
@@ -0,0 +1,86 @@
1
+ require 'strscan'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+
7
+ # Represents a 'section' section of a dat file, formatted like this:
8
+ #
9
+ # Content-Type: application/x-Mascot; name="parameters"
10
+ #
11
+ # LICENSE=Licensed to: Matrix Science Internal use only - Frill, (4 processors).
12
+ # MP=
13
+ # NM=
14
+ # COM=Peptide Mass Fingerprint Example
15
+ # IATOL=
16
+ # ...
17
+ #
18
+ # Example from mascot data F981122.dat
19
+ class Section
20
+
21
+ # Matches a content-type declaration plus any preceding/following
22
+ # whitespace. The section name is matched in slot 0.
23
+ CONTENT_TYPE_REGEXP = /\s*Content-Type: application\/x-Mascot; name=\"(.*?)\"\n\s*/
24
+
25
+ # A format string used to format parameters as a string.
26
+ TO_S_FORMAT = "%s=%s\n"
27
+
28
+ class << self
29
+
30
+ # Parses a new instance from str. Section after then content-type
31
+ # declaration are parsed into the parameters hash. Section follow
32
+ # a simple "key=value\n" pattern.
33
+ def parse(str)
34
+ params = {}
35
+ scanner = StringScanner.new(str)
36
+
37
+ # skip whitespace and content type declaration
38
+ unless scanner.scan(CONTENT_TYPE_REGEXP)
39
+ raise "unknown content type: #{content_type}"
40
+ end
41
+ section_name = scanner[1]
42
+
43
+ # scan each pair.
44
+ while key = scanner.scan(/[^=]+/)
45
+ scanner.skip(/=/)
46
+ params[key] = scanner.scan(/[^\n]*/)
47
+ scanner.skip(/\n/)
48
+ end
49
+
50
+ new(params, section_name)
51
+ end
52
+
53
+ # Returns the name of the section represented by this class. Section
54
+ # names are by default the downcase, unnested class name, for
55
+ # example:
56
+ #
57
+ # Ms::Mascot::Dat::Section.section_name # => "parameters"
58
+ #
59
+ def section_name
60
+ @section_name ||= to_s.split('::').last.downcase
61
+ end
62
+ end
63
+
64
+ # A hash of data in self.
65
+ attr_reader :data
66
+
67
+ # The class section_name.
68
+ attr_reader :section_name
69
+
70
+ def initialize(data={}, section_name=self.class.section_name)
71
+ @data = data
72
+ @section_name = section_name
73
+ end
74
+
75
+ # Formats self as a string with the content-type header.
76
+ def to_s
77
+ %Q{
78
+
79
+ Content-Type: application/x-Mascot; name="#{section_name}"
80
+
81
+ #{data.to_a.collect {|entry| TO_S_FORMAT % entry}.join}}
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,8 @@
1
+ require 'ms/mascot/dat/section'
2
+
3
+ # Summaries differ in their meaning depending on the type of search but the
4
+ # content is in the same format. The best way to add a sensible api and to
5
+ # keep the basic archive lookup structure is to define modules that extend
6
+ # a summary with, say an MS/MS ion search api.
7
+ class Ms::Mascot::Dat::Summary < Ms::Mascot::Dat::Section
8
+ end
@@ -0,0 +1,54 @@
1
+ require 'ms/mascot/dat/summary'
2
+
3
+ module Ms
4
+ module Mascot
5
+ module Dat
6
+ class Summary
7
+ class Id < Ms::Mascot::Dat::Summary
8
+ class Peptide
9
+
10
+ class << self
11
+ def from_strs(hit_string, hit_terms_string)
12
+ vals = hit_string.split(',')
13
+ vals.push( *(hit_terms_string.split(',')) )
14
+ self.new(*vals)
15
+ end
16
+
17
+ def from_hash(hash)
18
+ obj = self.new
19
+ hash.each do |k,v|
20
+ obj[k.to_sym] = v
21
+ end
22
+ end
23
+ end
24
+
25
+ PEPTIDE_ATTS = %w{
26
+ ui0 calc_mr delta start end num_match seq rank ui8 score ui11 ui12 ui13 ui14 ui15 res_before res_after
27
+ }.map {|v| v.to_sym }
28
+
29
+ CASTING = {
30
+ :calc_mr => 'to_f',
31
+ :delta => 'to_f',
32
+ :start => 'to_i',
33
+ :end => 'to_i',
34
+ :num_match => 'to_i',
35
+ :rank => 'to_i',
36
+ :score => 'to_f'}
37
+
38
+ PEPTIDE_ATTS.each do |attribute|
39
+ if cast_method = CASTING[attribute]
40
+ attr_writer attribute
41
+ define_method(attribute) do
42
+ instance_variable_get("@#{attribute}").send(cast_method)
43
+ end
44
+ else
45
+ attr_accessor attribute
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end