mspire-mascot-dat 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +90 -15
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/mspire/mascot/dat.rb +181 -14
- data/lib/mspire/mascot/dat/cast.rb +31 -0
- data/lib/mspire/mascot/dat/header.rb +26 -0
- data/lib/mspire/mascot/dat/index.rb +42 -7
- data/lib/mspire/mascot/dat/masses.rb +22 -0
- data/lib/mspire/mascot/dat/parameters.rb +21 -0
- data/lib/mspire/mascot/dat/peptide.rb +70 -38
- data/lib/mspire/mascot/dat/protein.rb +8 -0
- data/lib/mspire/mascot/dat/query.rb +12 -17
- data/lib/mspire/mascot/dat/section/key_val.rb +18 -0
- data/spec/mspire/mascot/dat/index_spec.rb +17 -2
- data/spec/mspire/mascot/dat/peptide_spec.rb +48 -13
- data/spec/mspire/mascot/dat/query_spec.rb +2 -2
- data/spec/mspire/mascot/dat_spec.rb +150 -50
- data/spec/spec_helper.rb +17 -1
- metadata +8 -19
- data/mspire-mascot-dat.gemspec +0 -70
data/README.md
CHANGED
@@ -1,45 +1,120 @@
|
|
1
1
|
# mspire-mascot-dat
|
2
2
|
|
3
|
-
|
3
|
+
Access mascot search engine .dat results file.
|
4
4
|
|
5
|
-
|
5
|
+
* Simple interface
|
6
|
+
* Lazy reading from IO object
|
7
|
+
* Object access of key data types
|
8
|
+
* Data casts where appropriate
|
9
|
+
|
10
|
+
Pull requests (or requests for features) gladly accepted.
|
11
|
+
|
12
|
+
[API of latest version](http://rubydoc.info/gems/mspire-mascot-dat)
|
13
|
+
|
14
|
+
## Synposis
|
15
|
+
|
16
|
+
A Dat object reads information off an open IO object as lazily as possible.
|
17
|
+
The sections can be accessed like a hash.
|
6
18
|
|
7
19
|
```ruby
|
8
20
|
require 'mspire-mascot-dat'
|
9
21
|
|
10
22
|
Mspire::Mascot::Dat.open(file.dat) do |dat|
|
11
|
-
dat.
|
23
|
+
dat.keys # (or dat.sections) => [:parameters, :masses, ...]
|
24
|
+
|
25
|
+
dat[:peptides].each do |peptide|
|
26
|
+
# or: dat.each_peptide {|peptide| ... }
|
27
|
+
# data is properly cast
|
28
|
+
peptide.delta # => a Float
|
29
|
+
peptide.missed_cleavages # => an Integer
|
30
|
+
end
|
31
|
+
|
32
|
+
dat[:queries].each do |query|
|
33
|
+
query.title # => a String (unescaped)
|
34
|
+
end
|
35
|
+
|
36
|
+
dat[:proteins].each do |protein|
|
37
|
+
protein.accession
|
38
|
+
end
|
39
|
+
|
40
|
+
# or random query access
|
41
|
+
dat.query(22) # returns query #22
|
42
|
+
|
43
|
+
# sections with uppercase params are typically accessed by string
|
44
|
+
params = dat[:parameters]
|
45
|
+
params['CHARGE'] # => an Integer
|
46
|
+
|
47
|
+
# sections with lowercase params are accessed by symbol
|
48
|
+
header = dat[:header]
|
49
|
+
header[:sequences] # => an Integer
|
50
|
+
|
51
|
+
# sections that aren't normal key/value pairs returned as a String
|
52
|
+
dat[:unimod] # => a String containing lots of XML
|
53
|
+
dat[:enzyme] # => a String with enzyme data
|
12
54
|
end
|
55
|
+
```
|
56
|
+
|
57
|
+
Note that no support is given for accessing the 'summary' sections because they are often incomplete for large files anyway and the information can all be found by accessing the
|
13
58
|
|
59
|
+
### Enumerable information
|
60
|
+
|
61
|
+
Sections with enumerable objects may be accessed as each_<whatever> or with
|
62
|
+
Dat#[], which returns an enumerable. So, these are equivalent:
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
dat.each_peptide {|pep| ... }
|
66
|
+
dat[:peptides].each {|pep| ... }
|
67
|
+
|
68
|
+
# these also are equivalent (return an enumerator)
|
69
|
+
enumerator = dat.each_peptide
|
70
|
+
enumerator = dat[:peptides]
|
14
71
|
```
|
15
|
-
### each peptide
|
16
72
|
|
17
|
-
|
73
|
+
Enumerators for some objects will have additional parameters that may be passed in (to either method style). For instance, the user may retrieve the top **n** peptide hits:
|
18
74
|
|
19
75
|
```ruby
|
20
|
-
dat.each_peptide
|
21
|
-
|
22
|
-
|
23
|
-
|
76
|
+
dat.each_peptide(1) {|peptide| ... } # only top peptide hits
|
77
|
+
```
|
78
|
+
|
79
|
+
### Queries
|
80
|
+
|
81
|
+
In a dat file, each query is its own section, but this makes them fairly
|
82
|
+
awkward to access. We treat them as if they were grouped into a single
|
83
|
+
section.
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
dat[:queries].each do |query|
|
87
|
+
# hash or method access
|
88
|
+
query[:charge] # => a positive or negative Integer
|
89
|
+
query.charge
|
90
|
+
query.Ions1 # or query.peaks
|
24
91
|
end
|
25
92
|
```
|
26
93
|
|
27
|
-
|
94
|
+
But they can also be accessed by query number:
|
28
95
|
|
29
96
|
```ruby
|
30
|
-
dat.
|
97
|
+
dat.query(23) # return query23
|
31
98
|
```
|
32
99
|
|
33
|
-
|
100
|
+
### Decoys
|
101
|
+
|
102
|
+
Decoy peptides may be accessed a few different ways, all of which are equivalent:
|
34
103
|
|
35
104
|
```ruby
|
36
|
-
dat.each_peptide(
|
105
|
+
dat.each_peptide(false) {|peptide| ... }
|
106
|
+
dat[:peptides, false].each {|peptide| ... }
|
107
|
+
dat.each_decoy_peptide {|peptide| ... }
|
108
|
+
dat[:decoy_peptides].each {|peptide| ... }
|
37
109
|
```
|
38
110
|
|
39
111
|
## Further Info
|
40
112
|
|
41
|
-
See
|
42
|
-
|
113
|
+
See the specs for additonal examples.
|
114
|
+
|
115
|
+
Also, see Mascot's "Installation & Setup Manual" for detailed information
|
116
|
+
about the .dat format itself (can be accessed from the mascot main page
|
117
|
+
of whichever mascot you are using).
|
43
118
|
|
44
119
|
## Copyright
|
45
120
|
|
data/Rakefile
CHANGED
@@ -13,7 +13,6 @@ Jeweler::Tasks.new do |gem|
|
|
13
13
|
gem.description = %Q{Reads mascot dat files with gusto for mspire library.}
|
14
14
|
gem.email = "jtprince@gmail.com"
|
15
15
|
gem.authors = ["John T. Prince"]
|
16
|
-
gem.add_dependency "elif", "~> 0.1.0"
|
17
16
|
gem.add_development_dependency "rspec", "~> 2.8.0"
|
18
17
|
gem.add_development_dependency "rdoc", "~> 3.12"
|
19
18
|
gem.add_development_dependency "jeweler", "~> 1.8.4"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/lib/mspire/mascot/dat.rb
CHANGED
@@ -1,25 +1,169 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
|
2
|
+
%w(
|
3
|
+
index
|
4
|
+
peptide
|
5
|
+
query
|
6
|
+
protein
|
7
|
+
parameters
|
8
|
+
header masses
|
9
|
+
).each do |subsection|
|
10
|
+
require "mspire/mascot/dat/#{subsection}"
|
11
|
+
end
|
4
12
|
|
5
13
|
module Mspire
|
6
14
|
module Mascot
|
7
15
|
class Dat
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
# reads each line from a section until reaching the end of the section
|
20
|
+
def each_line(io, &block)
|
21
|
+
return to_enum(__method__, io) unless block
|
22
|
+
io.each_line do |line|
|
23
|
+
break if line[0,2] == '--'
|
24
|
+
block.call(line)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# returns the key and value for KEY=VAL sections
|
29
|
+
def each_key_val(io, &block)
|
30
|
+
return to_enum(__method__, io) unless block
|
31
|
+
each_line(io) do |line|
|
32
|
+
line.chomp!
|
33
|
+
(key, val) = line.split('=',2)
|
34
|
+
block.call( [key, (val=='' ? nil : val)] )
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def string(io, &block)
|
39
|
+
each_line(io).to_a.join
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns the string after stripping off leading and trailing double
|
43
|
+
# quotation marks
|
44
|
+
def strip_quotes(string)
|
45
|
+
string.gsub(/\A"|"\Z/, '')
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
def open(file, index_file=false, &block)
|
50
|
+
io = File.open(file)
|
51
|
+
response = block.call(self.new(io, index_file))
|
52
|
+
io.close
|
53
|
+
response
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
|
8
59
|
# the io object which is the open dat file
|
9
60
|
attr_accessor :io
|
10
61
|
|
11
62
|
# the index object which points to the start byte for each section
|
12
63
|
attr_accessor :index
|
13
64
|
|
14
|
-
|
65
|
+
# if index_file is true, will attempt to use a written index file
|
66
|
+
# based on naming conventions; if one doesn't yet exist it will create
|
67
|
+
# one for the next usage. If handed a String, will consider it the
|
68
|
+
# index filename for reading or writing depending on whether it exists.
|
69
|
+
def initialize(io, index_file=false)
|
15
70
|
@io = io
|
16
|
-
|
71
|
+
index_filename =
|
72
|
+
case index_file
|
73
|
+
when String then index_file
|
74
|
+
when TrueClass then Dat::Index.index_filename(io.path)
|
75
|
+
else
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
@index = Index.new
|
79
|
+
if index_filename && File.exist?(index_filename)
|
80
|
+
@index.from_byteindex!(index_filename)
|
81
|
+
else
|
82
|
+
@index.from_io!(@io)
|
83
|
+
end
|
84
|
+
|
85
|
+
if index_filename && !File.exist?(index_filename)
|
86
|
+
@index.write(index_filename)
|
87
|
+
end
|
17
88
|
end
|
18
89
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
90
|
+
# the univeral way to access information
|
91
|
+
# returns the section with appropriate cast (if available) or as a
|
92
|
+
# String object with the information. nil if it doesn't exist. Also
|
93
|
+
# responds to :query by calling Query::each. An enumerator is called
|
94
|
+
# for enumerable objects.
|
95
|
+
#
|
96
|
+
# dat.section(:header) # => a Dat::Header object (hash-like)
|
97
|
+
# dat.section(:peptides) # => an Enumerator for peptides
|
98
|
+
# dat.section(:peptides, 1) # => an Enumerator for top peptides
|
99
|
+
# dat[:peptides, 1].each {|peptide| ... <top peptide> }
|
100
|
+
# # the equivalent each_<whatever> method:
|
101
|
+
# dat.each_peptide(1) {|peptide| ... <top peptide> }
|
102
|
+
#
|
103
|
+
# # aliased with #[] for bracket access:
|
104
|
+
# dat[:header]
|
105
|
+
# dat[:peptides, 1]
|
106
|
+
# ...
|
107
|
+
#
|
108
|
+
def section(*args)
|
109
|
+
# If the name exists as a class, then try to call the from_io method
|
110
|
+
# on the class (e.g., Parameters.from_io(io)). If the name is a
|
111
|
+
# plural, try the singular and the ::each method on the singular class
|
112
|
+
# (e.g., Peptide::each).
|
113
|
+
name = args.first.to_sym
|
114
|
+
capitalized = name.to_s.capitalize
|
115
|
+
maybe_singular =
|
116
|
+
case capitalized
|
117
|
+
when 'Queries'
|
118
|
+
'query'
|
119
|
+
else
|
120
|
+
start_section!(name)
|
121
|
+
capitalized[0...-1]
|
122
|
+
end
|
123
|
+
maybe_iterator = "each_#{maybe_singular.downcase}".to_sym
|
124
|
+
if self.respond_to?(maybe_iterator)
|
125
|
+
self.send(maybe_iterator, *args[1..-1])
|
126
|
+
elsif Mspire::Mascot::Dat.const_defined?(capitalized)
|
127
|
+
klass = Mspire::Mascot::Dat.const_get(capitalized)
|
128
|
+
obj = klass.new
|
129
|
+
if obj.respond_to?(:from_io!)
|
130
|
+
case name
|
131
|
+
when :parameters, :masses
|
132
|
+
obj.send(:from_io!, @io, false)
|
133
|
+
else
|
134
|
+
obj.send(:from_io!, @io)
|
135
|
+
end
|
136
|
+
else
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
#elsif Mspire::Mascot::Dat.const_defined?(maybe_singular)
|
140
|
+
# klass = Mspire::Mascot::Dat.const_get(maybe_singular)
|
141
|
+
# klass.send(:each, @io, &block)
|
142
|
+
elsif @index.byte_num.key?(name)
|
143
|
+
Mspire::Mascot::Dat.string(@io)
|
144
|
+
else
|
145
|
+
nil
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
alias_method :[], :section
|
150
|
+
|
151
|
+
def each_protein(&block)
|
152
|
+
return to_enum(__method__) unless block
|
153
|
+
start_section!(:proteins)
|
154
|
+
Dat.each_key_val(@io) do |key, val|
|
155
|
+
(mw_s, desc) = val.split(',', 2)
|
156
|
+
block.call(Dat::Protein.new(Dat.strip_quotes(key), mw_s.to_f, Dat.strip_quotes(desc)))
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def each_query(&block)
|
161
|
+
return to_enum(__method__) unless block
|
162
|
+
@index.query_nums.each do |query_num|
|
163
|
+
byte = @index.query_num_to_byte[query_num]
|
164
|
+
@io.pos = byte
|
165
|
+
block.call( Mspire::Mascot::Dat::Query.new.from_io!(@io) )
|
166
|
+
end
|
23
167
|
end
|
24
168
|
|
25
169
|
# positions io at the beginning of the section data (past the Content
|
@@ -30,19 +174,40 @@ module Mspire
|
|
30
174
|
self
|
31
175
|
end
|
32
176
|
|
177
|
+
# returns query number n (these are NOT zero indexed)
|
33
178
|
def query(n)
|
34
179
|
start_section!(n)
|
35
|
-
Query.from_io(@io)
|
180
|
+
Query.new.from_io!(@io)
|
36
181
|
end
|
37
182
|
|
38
|
-
|
39
|
-
|
183
|
+
# optional parameters, passed in any order:
|
184
|
+
#
|
185
|
+
# top_n: [Float::INFINITY] a Numeric (top N hits)
|
186
|
+
# non_decoy: [true] a Boolean
|
187
|
+
#
|
188
|
+
# Returns the top_n hits. If non_decoy is false or nil, returns the
|
189
|
+
# decoy hits.
|
190
|
+
#
|
191
|
+
# each_peptide(false, 1) # top decoy peptide hit
|
192
|
+
# each_peptide(2, true) # top 2 peptide hits per query
|
193
|
+
# each_peptide(1) # top peptide hit per query
|
194
|
+
def each_peptide(*args, &block)
|
195
|
+
return to_enum(__method__, *args) unless block
|
196
|
+
(numeric, boolean) = args.partition {|arg| arg.is_a?(Numeric) }
|
197
|
+
top_n = numeric.first || Float::INFINITY
|
198
|
+
non_decoy = ((boolean.size > 0) ? boolean.first : true)
|
40
199
|
start_section!(non_decoy ? :peptides : :decoy_peptides)
|
41
|
-
|
42
|
-
|
200
|
+
Mspire::Mascot::Dat::Peptide.each(@io) do |peptide|
|
201
|
+
if peptide.peptide_num <= top_n
|
202
|
+
block.call(peptide)
|
203
|
+
end
|
43
204
|
end
|
44
205
|
end
|
45
206
|
|
207
|
+
def each_decoy_peptide(top_n=Float::INFINITY, &block)
|
208
|
+
each_peptide(false, top_n, &block)
|
209
|
+
end
|
210
|
+
|
46
211
|
# returns a list of all sections as symbols. The symbol :queries is
|
47
212
|
# returned rather than each query individually if their is 1 or more
|
48
213
|
# queries.
|
@@ -54,6 +219,8 @@ module Mspire
|
|
54
219
|
reply.map(&:to_sym)
|
55
220
|
end
|
56
221
|
|
222
|
+
alias_method :keys, :sections
|
223
|
+
|
57
224
|
end
|
58
225
|
end
|
59
226
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
module Mspire
|
3
|
+
module Mascot
|
4
|
+
class Dat
|
5
|
+
module Cast
|
6
|
+
TO_INT_ARRAY = ->(val) { val.split(',').map(&:to_i) }
|
7
|
+
FROM_CHARGE_STRING = ->(st) { (st[-1] << st[0...-1]).to_i }
|
8
|
+
CGI_UNESCAPE = ->(st) { CGI.unescape(st) }
|
9
|
+
FLOAT_PAIRS = ->(st) { st.split(',').map {|pair_s| pair_s.split(':').map(&:to_f) } }
|
10
|
+
end
|
11
|
+
|
12
|
+
module Castable
|
13
|
+
# expects a hash with the parameter and the way to cast it as a symbol
|
14
|
+
# (e.g., :to_f or a lambda). If no hash given, will attempt to
|
15
|
+
# retrieve a class constant 'CAST' which defines the casts.
|
16
|
+
def cast!(cast_hash=nil)
|
17
|
+
hash = cast_hash || self.class.const_get('CAST')
|
18
|
+
self.each_pair do |k,v|
|
19
|
+
if cast=hash[k]
|
20
|
+
apply = cast.is_a?(Symbol) ? cast.to_proc : cast
|
21
|
+
self[k] = apply[v] if apply
|
22
|
+
end
|
23
|
+
end
|
24
|
+
self
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'mspire/mascot/dat/section/key_val'
|
2
|
+
require 'mspire/mascot/dat/cast'
|
3
|
+
|
4
|
+
module Mspire
|
5
|
+
module Mascot
|
6
|
+
class Dat
|
7
|
+
# The parameters is a hash with some casting (see CAST) and is
|
8
|
+
# accessible with upper case String keys.
|
9
|
+
class Header < Hash
|
10
|
+
include Section::KeyVal
|
11
|
+
include Castable
|
12
|
+
|
13
|
+
CAST = {
|
14
|
+
sequences: :to_i,
|
15
|
+
sequences_after_tax: :to_i,
|
16
|
+
residues: :to_i,
|
17
|
+
distribution: Cast::TO_INT_ARRAY,
|
18
|
+
distribution_decoy: Cast::TO_INT_ARRAY,
|
19
|
+
queries: :to_i,
|
20
|
+
max_hits: :to_i,
|
21
|
+
}
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'json'
|
2
2
|
|
3
3
|
module Mspire
|
4
4
|
module Mascot
|
@@ -6,8 +6,16 @@ module Mspire
|
|
6
6
|
# makes a byte index (not line index)
|
7
7
|
class Index
|
8
8
|
|
9
|
+
INDEX_EXT = '.byteindex'
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def index_filename(file)
|
13
|
+
file + Dat::INDEX_EXT
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
9
17
|
# the hash holding the start byte for each section (besides the
|
10
|
-
# queries)
|
18
|
+
# queries). Keyed by symbol.
|
11
19
|
attr_accessor :byte_num
|
12
20
|
|
13
21
|
# the array holding the start byte for each query. It is indexed by
|
@@ -17,11 +25,13 @@ module Mspire
|
|
17
25
|
# an array of the query nums
|
18
26
|
attr_accessor :query_nums
|
19
27
|
|
20
|
-
|
28
|
+
|
29
|
+
# if handed an index_bytefile it will open the filename and use that
|
30
|
+
# for the index
|
31
|
+
def initialize
|
21
32
|
@byte_num = {}
|
22
33
|
@query_num_to_byte = []
|
23
34
|
@query_nums = []
|
24
|
-
from_io(io) if io
|
25
35
|
end
|
26
36
|
|
27
37
|
def has_queries?
|
@@ -29,7 +39,32 @@ module Mspire
|
|
29
39
|
end
|
30
40
|
|
31
41
|
# returns self
|
32
|
-
def
|
42
|
+
def from_byteindex!(filename)
|
43
|
+
hash = JSON.parse!( IO.read(filename) )
|
44
|
+
[:byte_num, :query_num_to_byte, :query_nums].each do |key|
|
45
|
+
self.send("#{key}=", hash[key.to_s])
|
46
|
+
end
|
47
|
+
@byte_num.keys.each {|k| @byte_num[k.to_sym] = @byte_num.delete(k) }
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
def write(filename)
|
52
|
+
File.open(filename,'w') do |io|
|
53
|
+
JSON.dump(
|
54
|
+
{
|
55
|
+
byte_num: byte_num,
|
56
|
+
query_num_to_byte: query_num_to_byte,
|
57
|
+
query_nums: query_nums,
|
58
|
+
}, io)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def from_file!(filename)
|
63
|
+
File.open(filename) {|io| from_io!(io) }
|
64
|
+
end
|
65
|
+
|
66
|
+
# returns self
|
67
|
+
def from_io!(io)
|
33
68
|
io.rewind
|
34
69
|
while line=io.gets
|
35
70
|
io.each_line do |line|
|
@@ -43,7 +78,7 @@ module Mspire
|
|
43
78
|
@query_nums << query_num
|
44
79
|
@query_num_to_byte[query_num] = pos
|
45
80
|
else
|
46
|
-
@byte_num[head] = pos
|
81
|
+
@byte_num[head.to_sym] = pos
|
47
82
|
end
|
48
83
|
end
|
49
84
|
end
|
@@ -63,7 +98,7 @@ module Mspire
|
|
63
98
|
if key.is_a?(Integer)
|
64
99
|
@query_num_to_byte[key]
|
65
100
|
else
|
66
|
-
@byte_num[key.
|
101
|
+
@byte_num[key.to_sym]
|
67
102
|
end
|
68
103
|
end
|
69
104
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
require 'mspire/mascot/dat/section/key_val'
|
3
|
+
#require 'mspire/mascot/dat/cast'
|
4
|
+
|
5
|
+
module Mspire
|
6
|
+
module Mascot
|
7
|
+
class Dat
|
8
|
+
class Masses < Hash
|
9
|
+
include Section::KeyVal
|
10
|
+
#include Castable
|
11
|
+
|
12
|
+
#CAST = {
|
13
|
+
# 'TOL' => :to_f,
|
14
|
+
# 'ITOL' => :to_f,
|
15
|
+
# 'PFA' => :to_i,
|
16
|
+
# 'CHARGE' => Cast::FROM_CHARGE_STRING,
|
17
|
+
#}
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'mspire/mascot/dat/section/key_val'
|
2
|
+
require 'mspire/mascot/dat/cast'
|
3
|
+
|
4
|
+
module Mspire
|
5
|
+
module Mascot
|
6
|
+
class Dat
|
7
|
+
class Parameters < Hash
|
8
|
+
include Section::KeyVal
|
9
|
+
include Castable
|
10
|
+
|
11
|
+
CAST = {
|
12
|
+
'TOL' => :to_f,
|
13
|
+
'ITOL' => :to_f,
|
14
|
+
'PFA' => :to_i,
|
15
|
+
'CHARGE' => Cast::FROM_CHARGE_STRING,
|
16
|
+
}
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,55 +1,87 @@
|
|
1
|
+
require 'mspire/mascot/dat/cast'
|
1
2
|
|
2
3
|
module Mspire
|
3
4
|
module Mascot
|
4
5
|
class Dat
|
5
6
|
# mr = relative molecular mass; data contains keys of relative
|
6
7
|
Peptide = Struct.new(:missed_cleavages, :mr, :delta, :num_ions_matched, :seq, :peaks_from_ions_1, :var_mods_string, :ions_score, :ion_series_found, :peaks_from_ions_2, :peaks_from_ions_3, :query_num, :peptide_num, :proteins, :data) do
|
7
|
-
|
8
|
+
include Castable
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# if it reaches the end of the section or it is a blank line
|
17
|
-
def self.from_io(io, proteins=false, data=false)
|
18
|
-
finished = ->(line) { line.size < 2 || line[0,2] == '--' }
|
10
|
+
|
11
|
+
# reads the next line. If it contains valid query information returns
|
12
|
+
# an array [query_num, peptide_num, info_tag, value]. If it no valid
|
13
|
+
# query information, resets the io position to the beginning of the
|
14
|
+
# string and returns nil.
|
15
|
+
def self.next_qp_data(io)
|
16
|
+
before = io.pos
|
19
17
|
line = io.readline("\n")
|
20
|
-
if
|
18
|
+
if line[0,2] == '--'
|
19
|
+
io.pos = before
|
21
20
|
nil
|
22
21
|
else
|
23
|
-
|
24
|
-
(
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
line.chomp!
|
23
|
+
(qpstring, value) = line.split('=',2)
|
24
|
+
(qns, pns, info_tag) = qpstring.split('_', 3)
|
25
|
+
(qnum, pnum) = [qns, pns].map {|ns| ns[1..-1].to_i }
|
26
|
+
[qnum, pnum, info_tag, value]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# given the value part of the initial peptide data (q1_p1=<value>),
|
31
|
+
# sets the object's properties. returns the pephit
|
32
|
+
def self.from_value_string(value, qnum, pnum)
|
33
|
+
(core, prots) = value.split(';', 2)
|
34
|
+
pephit = self.new(*core.split(','), qnum, pnum)
|
35
|
+
pephit.cast!
|
36
|
+
pephit
|
37
|
+
end
|
38
|
+
|
39
|
+
# returns the query num and peptide num and info_tag and string. nil if they don't exist.
|
40
|
+
def self.dissect_line(line)
|
41
|
+
if md=/q(\d+)_p_?(\d+)(\w*)=(.*)/.match(line)
|
42
|
+
[md[1].to_i, md[2].to_i, md[3], md[4]]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
# returns each peptide hit. Some queries will not have *any* hits,
|
48
|
+
# and these are *not* yielded.
|
49
|
+
def self.each(io, &block)
|
50
|
+
return to_enum(__method__, io) unless block
|
51
|
+
before = io.pos
|
52
|
+
peptide = nil
|
53
|
+
while reply=dissect_line(io.readline("\n"))
|
54
|
+
(qnum, pnum, info_tag, value) = reply
|
55
|
+
if info_tag == ''
|
56
|
+
track_pos = io.pos
|
57
|
+
block.call(peptide) if peptide # yield the previous peptide
|
58
|
+
io.pos = track_pos
|
59
|
+
peptide =
|
60
|
+
(value == "-1") ? nil : self.from_value_string(value, qnum, pnum)
|
61
|
+
else
|
62
|
+
# implement reading in future
|
48
63
|
end
|
49
|
-
|
64
|
+
before = io.pos
|
50
65
|
end
|
66
|
+
# yield that last peptide
|
67
|
+
|
68
|
+
track_pos = io.pos
|
69
|
+
block.call(peptide) if peptide
|
70
|
+
io.pos = track_pos
|
51
71
|
end
|
52
72
|
end
|
73
|
+
class Peptide
|
74
|
+
CAST = {
|
75
|
+
missed_cleavages: :to_i,
|
76
|
+
mr: :to_f,
|
77
|
+
delta: :to_f,
|
78
|
+
num_ions_matched: :to_i,
|
79
|
+
ions: :string,
|
80
|
+
ions_score: :to_f,
|
81
|
+
peaks_from_ions_2: :to_i,
|
82
|
+
peaks_from_ions_3: :to_i,
|
83
|
+
}
|
84
|
+
end
|
53
85
|
end
|
54
86
|
end
|
55
87
|
end
|
@@ -2,14 +2,17 @@ require 'ostruct'
|
|
2
2
|
require 'delegate'
|
3
3
|
require 'cgi'
|
4
4
|
|
5
|
+
require 'mspire/mascot/dat/cast'
|
6
|
+
|
5
7
|
module Mspire
|
6
8
|
module Mascot
|
7
9
|
class Dat
|
8
10
|
class Query < Hash
|
11
|
+
include Castable
|
9
12
|
|
10
13
|
CAST = {
|
11
|
-
charge:
|
12
|
-
title:
|
14
|
+
charge: Cast::FROM_CHARGE_STRING,
|
15
|
+
title: Cast::CGI_UNESCAPE,
|
13
16
|
mass_min: :to_f,
|
14
17
|
mass_max: :to_f,
|
15
18
|
int_min: :to_f,
|
@@ -17,27 +20,19 @@ module Mspire
|
|
17
20
|
num_vals: :to_i,
|
18
21
|
num_used1: :to_i,
|
19
22
|
index: :to_i,
|
20
|
-
Ions1:
|
23
|
+
Ions1: Cast::FLOAT_PAIRS,
|
21
24
|
}
|
22
25
|
|
23
26
|
# returns self
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
break if line[0,2] == '--'
|
28
|
-
line.chomp!
|
29
|
-
(key, val) = line.split('=')
|
30
|
-
query[key.to_sym] = val
|
31
|
-
end
|
32
|
-
query.each do |k,v|
|
33
|
-
if cast=CAST[k]
|
34
|
-
apply = cast.is_a?(Symbol) ? cast.to_proc : cast
|
35
|
-
query[k] = apply[v] if apply
|
36
|
-
end
|
27
|
+
def from_io!(io)
|
28
|
+
Dat.each_key_val(io) do |key,val|
|
29
|
+
self[key.to_sym] = val
|
37
30
|
end
|
38
|
-
|
31
|
+
cast!
|
39
32
|
end
|
40
33
|
|
34
|
+
def peaks() self[:Ions1] end
|
35
|
+
|
41
36
|
def method_missing(*args, &block)
|
42
37
|
if args[0].to_s[-1] == '='
|
43
38
|
if self.key?(args[0...-1])
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
module Mspire
|
3
|
+
module Mascot
|
4
|
+
class Dat
|
5
|
+
module Section
|
6
|
+
module KeyVal
|
7
|
+
def from_io!(io, as_symbols=true)
|
8
|
+
Dat.each_key_val(io) do |key,val|
|
9
|
+
self[ as_symbols ? key.to_sym : key ] = val
|
10
|
+
end
|
11
|
+
self.send(:cast!) if self.respond_to?(:cast!)
|
12
|
+
self
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -2,16 +2,18 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
require 'mspire/mascot/dat/index'
|
4
4
|
|
5
|
+
require 'fileutils'
|
6
|
+
|
5
7
|
describe 'Mspire::Mascot::Dat::Index being initialized from file' do
|
6
8
|
|
7
9
|
let(:io) { File.open(TESTFILES + "/F004128.dat") }
|
8
10
|
|
9
11
|
specify '#initialize(io) creates the index object' do
|
10
|
-
Mspire::Mascot::Dat::Index.new(io).should be_a(Mspire::Mascot::Dat::Index)
|
12
|
+
Mspire::Mascot::Dat::Index.new.from_io!(io).should be_a(Mspire::Mascot::Dat::Index)
|
11
13
|
end
|
12
14
|
|
13
15
|
describe Mspire::Mascot::Dat::Index do
|
14
|
-
subject { Mspire::Mascot::Dat::Index.new(io) }
|
16
|
+
subject { Mspire::Mascot::Dat::Index.new.from_io!(io) }
|
15
17
|
|
16
18
|
it 'can access the header start byte nums' do
|
17
19
|
|
@@ -40,5 +42,18 @@ describe 'Mspire::Mascot::Dat::Index being initialized from file' do
|
|
40
42
|
subject['peptides'].should == 41624
|
41
43
|
end
|
42
44
|
|
45
|
+
it 'can write the index info and create an identical object from the file' do
|
46
|
+
spec_tmpdir do |tmpdir|
|
47
|
+
bytefile = tmpdir + "/index_bytefile.tmp"
|
48
|
+
subject.write( bytefile )
|
49
|
+
File.exist?( bytefile ).should be_true
|
50
|
+
File.size( bytefile ).should be > 0
|
51
|
+
fromfile = Mspire::Mascot::Dat::Index.new.from_byteindex!( bytefile )
|
52
|
+
[:byte_num, :query_num_to_byte, :query_nums].each do |methd|
|
53
|
+
fromfile.send(methd).should == subject.send(methd)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
43
58
|
end
|
44
59
|
end
|
@@ -1,24 +1,59 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
+
require 'mspire/mascot/dat'
|
3
4
|
require 'mspire/mascot/dat/peptide'
|
4
5
|
|
5
|
-
describe 'reading off
|
6
|
+
describe 'reading off the peptides' do
|
6
7
|
|
7
|
-
before(:
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@io
|
8
|
+
before(:each) do
|
9
|
+
file = TESTFILES + '/F004129.dat'
|
10
|
+
@io = File.open(file)
|
11
|
+
@io.pos = 62743
|
12
|
+
end
|
13
|
+
|
14
|
+
after(:each) do
|
15
|
+
@io.close
|
15
16
|
end
|
16
17
|
|
17
|
-
it '
|
18
|
-
|
19
|
-
|
20
|
-
peptide.send(k).should == v
|
18
|
+
it 'has an iterator' do
|
19
|
+
info = Mspire::Mascot::Dat::Peptide.each(@io).map do |pep|
|
20
|
+
[pep.ions_score, pep.query_num, pep.peptide_num]
|
21
21
|
end
|
22
|
+
info.should == [[0.22, 1, 1], [4.11, 2, 1], [2.84, 2, 2], [2.83, 2, 3], [2.65, 2, 4], [2.28, 2, 5], [1.07, 2, 6], [0.99, 2, 7], [0.96, 2, 8], [0.65, 2, 9], [0.63, 2, 10]]
|
22
23
|
end
|
23
24
|
|
24
25
|
end
|
26
|
+
|
27
|
+
=begin
|
28
|
+
q1_p1=0,2113.999008,-0.998912,4,VMLSDADPSLEQYYVNVR,17,00100000000000000000,0.22,0002002000000000000,0,0;"Q23985":0:79:96:1
|
29
|
+
q1_p1_terms=R,T
|
30
|
+
q1_p1_primary_nl=00100000000000000000
|
31
|
+
q2_p1=0,2978.269196,1.195840,5,MDSSSGSQGNGSFMDQNSLGILNMDNLK,17,000000000000001000000000100000,4.11,0002000020000000000,0,0;"Q9VV79":0:1:28:1
|
32
|
+
q2_p1_terms=-,V
|
33
|
+
q2_p1_primary_nl=000000000000002000000000200000
|
34
|
+
q2_p2=1,2979.449478,0.015558,5,STGAESSEEXLREAYIMASVEHVNLLK,45,00000000000000000100000000000,2.84,0000000020000000000,0,0;"Q6SAG3":0:875:901:1
|
35
|
+
q2_p2_terms=K,L
|
36
|
+
q2_p2_primary_nl=00000000000000000200000000000
|
37
|
+
q2_p2_subst=10,X,T
|
38
|
+
q2_p3=1,2980.525406,-1.060370,5,LSSPPSTSHTYEGKLLTKPTHTNTDLR,45,00000000000000000000000000000,2.83,0000000020000000000,0,0;"Q6KEU5":0:82:108:1,"Q6KEU6":0:82:108:1
|
39
|
+
q2_p3_terms=K,G:K,G
|
40
|
+
q2_p4=0,2978.269196,1.195840,4,MDSSSGSQGNGSFMDQNSLGILNMDNLK,31,010000000000000000000000100000,2.65,0000000020000000000,0,0;"Q9VV79":0:1:28:1
|
41
|
+
q2_p4_terms=-,V
|
42
|
+
q2_p4_primary_nl=020000000000000000000000200000
|
43
|
+
q2_p5=1,2980.380035,-0.914999,4,FGDMFSKESEQVALAVYEAYDPNVGSK,17,00000000000000000000000000000,2.28,0000002020000000000,0,0;"P82982":0:340:366:1
|
44
|
+
q2_p5_terms=K,S
|
45
|
+
q2_p6=0,2978.688492,0.776544,8,GAEFSSFSVVLLVIILIIVFLSNAYHK,118,00000000000000000000000000000,1.07,0000020000000000000,0,0;"A8DYF1":0:2:28:1
|
46
|
+
q2_p6_terms=M,A
|
47
|
+
q2_p7=1,2979.588989,-0.123953,4,KLQSNATVLSDGYAAHLAGLQAVGGSRPAK,45,00000000000000000000000000000000,0.99,0000020010000000000,0,0;"P43125":0:1187:1216:1,"P43125-2":0:1187:1216:1,"D2NUF3":0:172:201:1
|
48
|
+
q2_p7_terms=K,G:K,G:K,G
|
49
|
+
q2_p8=0,2980.229141,-0.764105,3,LMFGDEEGNLPSLDQEDEQVPETEED,31,0010000000000000000000000000,0.96,0002000000000000000,0,0;"Q9VVZ8":0:703:728:1
|
50
|
+
q2_p8_terms=R,-
|
51
|
+
q2_p8_primary_nl=0020000000000000000000000000
|
52
|
+
q2_p9=1,2978.364624,1.100412,2,LCPRCFQELSDYDTIMVNLMTTQK,17,00000000000000000000100000,0.65,0000002000000000000,0,0;"P07664":0:60:83:1
|
53
|
+
q2_p9_terms=R,R
|
54
|
+
q2_p9_primary_nl=00000000000000000000100000
|
55
|
+
q2_p10=0,2978.672592,0.792444,5,NGSSVAGTSVLSPSIPLTLVVLPALMIAQK,70,00000000000000000000000000100000,0.63,0000000020000000000,0,0;"B8A406":0:72:101:1,"C6SUW4":0:177:206:1,"A1Z9D9":0:246:275:1
|
56
|
+
q2_p10_terms=K,S:K,S:K,S
|
57
|
+
q2_p10_primary_nl=00000000000000000000000000100000
|
58
|
+
=end
|
59
|
+
|
@@ -24,8 +24,8 @@ END
|
|
24
24
|
@io = StringIO.new(data)
|
25
25
|
end
|
26
26
|
|
27
|
-
|
28
|
-
query = Mspire::Mascot::Dat::Query.from_io(@io)
|
27
|
+
specify '#from_io(io) returns a query object with appropriate casts' do
|
28
|
+
query = Mspire::Mascot::Dat::Query.new.from_io!(@io)
|
29
29
|
query.title.should == '1.2746.2746.2'
|
30
30
|
query.charge.should == -2
|
31
31
|
end
|
@@ -23,72 +23,172 @@ describe 'reading a dat file' do
|
|
23
23
|
@io.close
|
24
24
|
end
|
25
25
|
|
26
|
-
|
27
|
-
|
26
|
+
specify '#sections() returns all the sections (with queries considered a single group)' do
|
27
|
+
(sections=@dat.sections).should == [:parameters, :masses, :unimod, :enzyme, :header, :summary, :decoy_summary, :peptides, :decoy_peptides, :proteins, :index, :queries]
|
28
|
+
@dat.keys.should == sections
|
28
29
|
end
|
29
30
|
|
30
|
-
|
31
|
+
specify '#query(n) can retrieve queries at random' do
|
31
32
|
@dat.query(1).title.should == '1.2746.2746.2'
|
32
33
|
@dat.query(2).title.should == '1.2745.2745.4'
|
33
34
|
end
|
34
35
|
|
35
|
-
|
36
|
-
start = [ [1,1,'VMLSDADPSLEQYYVNVR'],
|
37
|
-
[2,1,'MDSSSGSQGNGSFMDQNSLGILNMDNLK'],
|
38
|
-
[2,2,'STGAESSEEXLREAYIMASVEHVNLLK'],
|
39
|
-
[2,3,'LSSPPSTSHTYEGKLLTKPTHTNTDLR'],
|
40
|
-
[2,4,'MDSSSGSQGNGSFMDQNSLGILNMDNLK']]
|
36
|
+
describe '#Dat[:<name>]' do
|
41
37
|
|
42
|
-
|
38
|
+
specify "#[:parameters] returns a hash-like object with proper casts" do
|
39
|
+
params = @dat[:parameters]
|
40
|
+
params.should be_a(Mspire::Mascot::Dat::Parameters)
|
41
|
+
params['LICENSE'].should == 'Licensed to: Brigham Young University, Provo, United States RCCZ-D4GH-S53W-2G5F-NG5L, (1 processor).'
|
42
|
+
params['IATOL'].should be_nil
|
43
|
+
params.key?('IATOL').should be_true
|
44
|
+
params.key?('silliness').should be_false
|
45
|
+
params['IT_MODS'].should == 'Oxidation (M)'
|
46
|
+
params['TOL'].should == 1.2
|
47
|
+
params['CHARGE'].should == 2
|
48
|
+
params['INTERNALS'].should == "0.0,700.0"
|
49
|
+
end
|
43
50
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
peptide.seq.should == aa
|
52
|
-
end
|
51
|
+
specify "#[:header] returns hash-like object with casts" do
|
52
|
+
header = @dat[:header]
|
53
|
+
header.should be_a(Mspire::Mascot::Dat::Header)
|
54
|
+
header[:sequences].should == 34724
|
55
|
+
header[:residues].should == 17622530
|
56
|
+
header[:distribution].should == [30914, 38, 61, 154, 203, 295, 417, 447, 500, 442, 360, 239, 168, 167, 98, 60, 39, 24, 15, 16, 14, 8, 7, 8, 5, 4, 7, 3, 3, 1, 1, 3, 1, 0, 1, 0, 1]
|
57
|
+
header[:release].should == 'GbetaCCT_drome.fasta'
|
53
58
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# this proves that each_peptide can also return an enumerator if asked
|
61
|
-
cnts = @dat.each_peptide.with_index.map do |peptide,i|
|
62
|
-
peptide.should(be_a(Mspire::Mascot::Dat::Peptide)) && i
|
59
|
+
|
60
|
+
specify '#[:masses] returns key val pairs (uncast)' do
|
61
|
+
masses = @dat[:masses]
|
62
|
+
masses.should be_an(Mspire::Mascot::Dat::Masses)
|
63
|
+
masses['A'].should == '71.037114'
|
64
|
+
masses['FixedModResidues1'].should == 'C'
|
63
65
|
end
|
64
|
-
cnts.should == (0..10).to_a
|
65
|
-
end
|
66
66
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
67
|
+
specify '#[:unimod] returns as a string the entire section' do
|
68
|
+
unimod_string = @dat[:unimod]
|
69
|
+
lines = unimod_string.each_line.to_a
|
70
|
+
lines.first.chomp.should == '<?xml version="1.0" encoding="UTF-8" ?>'
|
71
|
+
lines[-2].chomp.should == '</umod:unimod>'
|
72
72
|
end
|
73
|
-
ions_score_target.should == [0.22, 4.11]
|
74
73
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
74
|
+
specify '#[:enzyme] returns as a string the entire section' do
|
75
|
+
enzyme_string = @dat[:enzyme]
|
76
|
+
lines = enzyme_string.each_line.to_a
|
77
|
+
lines.first.chomp.should == 'Title:Trypsin'
|
78
|
+
lines.last.chomp.should == '*'
|
79
79
|
end
|
80
|
-
ions_score_decoy.should == [3.52, 4.58]
|
81
80
|
end
|
82
81
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
82
|
+
describe 'iterators' do
|
83
|
+
|
84
|
+
describe 'each_<name>' do
|
85
|
+
|
86
|
+
specify '#each_query retrieves every query' do
|
87
|
+
queries = @dat.each_query.to_a
|
88
|
+
queries.size.should == 2
|
89
|
+
queries.first.title.should == '1.2746.2746.2'
|
90
|
+
queries.last.title.should == '1.2745.2745.4'
|
91
|
+
end
|
92
|
+
|
93
|
+
specify '#each_peptide can retrieve every peptide' do
|
94
|
+
start = [ [1,1,'VMLSDADPSLEQYYVNVR'],
|
95
|
+
[2,1,'MDSSSGSQGNGSFMDQNSLGILNMDNLK'],
|
96
|
+
[2,2,'STGAESSEEXLREAYIMASVEHVNLLK'],
|
97
|
+
[2,3,'LSSPPSTSHTYEGKLLTKPTHTNTDLR'],
|
98
|
+
[2,4,'MDSSSGSQGNGSFMDQNSLGILNMDNLK']]
|
99
|
+
|
100
|
+
last = [2,10,'NGSSVAGTSVLSPSIPLTLVVLPALMIAQK']
|
101
|
+
|
102
|
+
last_pep = nil
|
103
|
+
@dat.each_peptide do |peptide|
|
104
|
+
last_pep = peptide
|
105
|
+
(qnum, pnum, aa) = start.shift
|
106
|
+
if qnum
|
107
|
+
peptide.query_num.should == qnum
|
108
|
+
peptide.peptide_num.should == pnum
|
109
|
+
peptide.seq.should == aa
|
110
|
+
end
|
111
|
+
end
|
112
|
+
(qnum, pnum, aa) = last
|
113
|
+
peptide = last_pep
|
114
|
+
peptide.query_num.should == qnum
|
115
|
+
peptide.peptide_num.should == pnum
|
116
|
+
peptide.seq.should == aa
|
117
|
+
|
118
|
+
# this proves that each_peptide can also return an enumerator if asked
|
119
|
+
cnts = @dat.each_peptide.with_index.map do |peptide,i|
|
120
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide)) && i
|
121
|
+
end
|
122
|
+
cnts.should == (0..10).to_a
|
123
|
+
end
|
124
|
+
|
125
|
+
specify '#each_peptide(true/false) can retrieve normal/decoy peptides' do
|
126
|
+
ions_score_target = @dat.each_peptide(true, 1).map do |peptide|
|
127
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide))
|
128
|
+
peptide.peptide_num.should == 1
|
129
|
+
peptide.ions_score
|
130
|
+
end
|
131
|
+
ions_score_target.should == [0.22, 4.11]
|
132
|
+
|
133
|
+
[:to_a, :reverse].each do |ar_order|
|
134
|
+
args = [1, false].send(ar_order)
|
135
|
+
ions_score_decoy = @dat.each_peptide(*args).map do |peptide|
|
136
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide))
|
137
|
+
peptide.peptide_num.should == 1
|
138
|
+
peptide.ions_score
|
139
|
+
end
|
140
|
+
ions_score_decoy.should == [3.52, 4.58]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
specify '#each_peptide(n) can retrieve just the top n peptides' do
|
145
|
+
n = 1
|
146
|
+
cnt = 0
|
147
|
+
@dat.each_peptide(n) do |peptide|
|
148
|
+
cnt += 1
|
149
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide))
|
150
|
+
peptide.query_num.should == cnt
|
151
|
+
peptide.peptide_num.should == 1
|
152
|
+
end
|
153
|
+
end
|
91
154
|
end
|
92
|
-
end
|
93
155
|
|
156
|
+
describe '#[:<name>] iterators' do
|
157
|
+
|
158
|
+
specify '#[:peptides] returns an enumerator' do
|
159
|
+
@dat[:peptides].should be_an(Enumerator)
|
160
|
+
@dat[:peptides].map(&:peptide_num).should == [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
161
|
+
@dat[:peptides].map(&:query_num).should == [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
|
162
|
+
@dat[:peptides].map(&:ions_score).should == [0.22, 4.11, 2.84, 2.83, 2.65, 2.28, 1.07, 0.99, 0.96, 0.65, 0.63]
|
163
|
+
@dat[:peptides, true, 1].map(&:peptide_num).should == [1,1]
|
164
|
+
end
|
165
|
+
|
166
|
+
specify '#[:decoy_peptides] returns an enumerator (or takes a block)' do
|
167
|
+
@dat[:decoy_peptides].should be_an(Enumerator)
|
168
|
+
@dat[:decoy_peptides].map(&:peptide_num).should == [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
169
|
+
@dat[:decoy_peptides].map(&:query_num).should == [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
|
170
|
+
@dat[:decoy_peptides].map(&:ions_score).should == [3.52, 4.58, 3.46, 3.3, 3.05, 3.05, 2.99, 2.97, 2.97, 2.87, 2.87]
|
171
|
+
@dat[:decoy_peptides, 1].map(&:peptide_num).should == [1,1]
|
172
|
+
end
|
173
|
+
|
174
|
+
specify '#[:queries] returns an enumerator (or takes a block)' do
|
175
|
+
@dat[:queries].should be_an(Enumerator)
|
176
|
+
@dat[:queries].map(&:title).should == ["1.2746.2746.2", "1.2745.2745.4"]
|
177
|
+
end
|
178
|
+
|
179
|
+
specify '#[:proteins] returns an enumerator (or takes a block)' do
|
180
|
+
data = [
|
181
|
+
["Q9VV79", 125605.17, "BcDNA.LD24702 OS=Drosophila melanogaster GN=spd-2 PE=1 SV=2"],
|
182
|
+
["Q23985", 82989.73, "Protein deltex OS=Drosophila melanogaster GN=dx PE=1 SV=2"]
|
183
|
+
]
|
184
|
+
|
185
|
+
@dat[:proteins].each do |protein|
|
186
|
+
exp = data.shift
|
187
|
+
protein.accession.should == exp.shift
|
188
|
+
protein.mw.should == exp.shift
|
189
|
+
protein.description.should == exp.shift
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
94
194
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,11 +1,27 @@
|
|
1
1
|
require 'rspec'
|
2
2
|
|
3
|
+
require 'fileutils'
|
4
|
+
|
3
5
|
# Requires supporting files with custom matchers and macros, etc,
|
4
6
|
# in ./support/ and its subdirectories.
|
5
7
|
#Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
6
8
|
|
9
|
+
SPEC_DIR = File.dirname(__FILE__)
|
10
|
+
|
7
11
|
RSpec.configure do |config|
|
8
12
|
config.treat_symbols_as_metadata_keys_with_true_values = true
|
13
|
+
config.formatter = :documentation
|
9
14
|
end
|
10
15
|
|
11
|
-
TESTFILES =
|
16
|
+
TESTFILES = SPEC_DIR + "/testfiles"
|
17
|
+
|
18
|
+
# creates a tmpdir, passes it into the block as a full path, then destroys at
|
19
|
+
# close of block. Returns whatever was returned by the block.
|
20
|
+
def spec_tmpdir(&block)
|
21
|
+
dir = File.expand_path(SPEC_DIR + "/tmp")
|
22
|
+
FileUtils.rm_rf( dir )
|
23
|
+
FileUtils.mkdir( dir )
|
24
|
+
reply = block.call(dir)
|
25
|
+
FileUtils.rm_rf( dir )
|
26
|
+
reply
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire-mascot-dat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,24 +9,8 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: elif
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 0.1.0
|
22
|
-
type: :runtime
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 0.1.0
|
30
14
|
- !ruby/object:Gem::Dependency
|
31
15
|
name: rspec
|
32
16
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,10 +74,15 @@ files:
|
|
90
74
|
- Rakefile
|
91
75
|
- VERSION
|
92
76
|
- lib/mspire/mascot/dat.rb
|
77
|
+
- lib/mspire/mascot/dat/cast.rb
|
78
|
+
- lib/mspire/mascot/dat/header.rb
|
93
79
|
- lib/mspire/mascot/dat/index.rb
|
80
|
+
- lib/mspire/mascot/dat/masses.rb
|
81
|
+
- lib/mspire/mascot/dat/parameters.rb
|
94
82
|
- lib/mspire/mascot/dat/peptide.rb
|
83
|
+
- lib/mspire/mascot/dat/protein.rb
|
95
84
|
- lib/mspire/mascot/dat/query.rb
|
96
|
-
- mspire
|
85
|
+
- lib/mspire/mascot/dat/section/key_val.rb
|
97
86
|
- spec/mspire/mascot/dat/index_spec.rb
|
98
87
|
- spec/mspire/mascot/dat/peptide_spec.rb
|
99
88
|
- spec/mspire/mascot/dat/query_spec.rb
|
data/mspire-mascot-dat.gemspec
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = "mspire-mascot-dat"
|
8
|
-
s.version = "0.0.1"
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["John T. Prince"]
|
12
|
-
s.date = "2013-03-28"
|
13
|
-
s.description = "Reads mascot dat files with gusto for mspire library."
|
14
|
-
s.email = "jtprince@gmail.com"
|
15
|
-
s.extra_rdoc_files = [
|
16
|
-
"LICENSE.txt",
|
17
|
-
"README.md"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
".rspec",
|
22
|
-
"LICENSE.txt",
|
23
|
-
"README.md",
|
24
|
-
"Rakefile",
|
25
|
-
"VERSION",
|
26
|
-
"lib/mspire/mascot/dat.rb",
|
27
|
-
"lib/mspire/mascot/dat/index.rb",
|
28
|
-
"lib/mspire/mascot/dat/peptide.rb",
|
29
|
-
"lib/mspire/mascot/dat/query.rb",
|
30
|
-
"mspire-mascot-dat.gemspec",
|
31
|
-
"spec/mspire/mascot/dat/index_spec.rb",
|
32
|
-
"spec/mspire/mascot/dat/peptide_spec.rb",
|
33
|
-
"spec/mspire/mascot/dat/query_spec.rb",
|
34
|
-
"spec/mspire/mascot/dat_spec.rb",
|
35
|
-
"spec/reference/dat_format_reference.md",
|
36
|
-
"spec/reference/two_spectra_decoy_F004129.png",
|
37
|
-
"spec/reference/two_spectra_no_decoy_F004128.png",
|
38
|
-
"spec/spec_helper.rb",
|
39
|
-
"spec/testfiles/F004128.dat",
|
40
|
-
"spec/testfiles/F004129.dat",
|
41
|
-
"spec/testfiles/two_spectra.mgf"
|
42
|
-
]
|
43
|
-
s.homepage = "http://github.com/princelab/mspire-mascot-dat"
|
44
|
-
s.licenses = ["MIT"]
|
45
|
-
s.require_paths = ["lib"]
|
46
|
-
s.rubygems_version = "1.8.23"
|
47
|
-
s.summary = "Reads mascot dat files for mspire library."
|
48
|
-
|
49
|
-
if s.respond_to? :specification_version then
|
50
|
-
s.specification_version = 3
|
51
|
-
|
52
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
|
-
s.add_runtime_dependency(%q<elif>, ["~> 0.1.0"])
|
54
|
-
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
55
|
-
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
56
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
57
|
-
else
|
58
|
-
s.add_dependency(%q<elif>, ["~> 0.1.0"])
|
59
|
-
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
60
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
61
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
62
|
-
end
|
63
|
-
else
|
64
|
-
s.add_dependency(%q<elif>, ["~> 0.1.0"])
|
65
|
-
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
66
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
67
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|