mspire-mascot-dat 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +90 -15
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/mspire/mascot/dat.rb +181 -14
- data/lib/mspire/mascot/dat/cast.rb +31 -0
- data/lib/mspire/mascot/dat/header.rb +26 -0
- data/lib/mspire/mascot/dat/index.rb +42 -7
- data/lib/mspire/mascot/dat/masses.rb +22 -0
- data/lib/mspire/mascot/dat/parameters.rb +21 -0
- data/lib/mspire/mascot/dat/peptide.rb +70 -38
- data/lib/mspire/mascot/dat/protein.rb +8 -0
- data/lib/mspire/mascot/dat/query.rb +12 -17
- data/lib/mspire/mascot/dat/section/key_val.rb +18 -0
- data/spec/mspire/mascot/dat/index_spec.rb +17 -2
- data/spec/mspire/mascot/dat/peptide_spec.rb +48 -13
- data/spec/mspire/mascot/dat/query_spec.rb +2 -2
- data/spec/mspire/mascot/dat_spec.rb +150 -50
- data/spec/spec_helper.rb +17 -1
- metadata +8 -19
- data/mspire-mascot-dat.gemspec +0 -70
data/README.md
CHANGED
|
@@ -1,45 +1,120 @@
|
|
|
1
1
|
# mspire-mascot-dat
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Access mascot search engine .dat results file.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
* Simple interface
|
|
6
|
+
* Lazy reading from IO object
|
|
7
|
+
* Object access of key data types
|
|
8
|
+
* Data casts where appropriate
|
|
9
|
+
|
|
10
|
+
Pull requests (or requests for features) gladly accepted.
|
|
11
|
+
|
|
12
|
+
[API of latest version](http://rubydoc.info/gems/mspire-mascot-dat)
|
|
13
|
+
|
|
14
|
+
## Synposis
|
|
15
|
+
|
|
16
|
+
A Dat object reads information off an open IO object as lazily as possible.
|
|
17
|
+
The sections can be accessed like a hash.
|
|
6
18
|
|
|
7
19
|
```ruby
|
|
8
20
|
require 'mspire-mascot-dat'
|
|
9
21
|
|
|
10
22
|
Mspire::Mascot::Dat.open(file.dat) do |dat|
|
|
11
|
-
dat.
|
|
23
|
+
dat.keys # (or dat.sections) => [:parameters, :masses, ...]
|
|
24
|
+
|
|
25
|
+
dat[:peptides].each do |peptide|
|
|
26
|
+
# or: dat.each_peptide {|peptide| ... }
|
|
27
|
+
# data is properly cast
|
|
28
|
+
peptide.delta # => a Float
|
|
29
|
+
peptide.missed_cleavages # => an Integer
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
dat[:queries].each do |query|
|
|
33
|
+
query.title # => a String (unescaped)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
dat[:proteins].each do |protein|
|
|
37
|
+
protein.accession
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# or random query access
|
|
41
|
+
dat.query(22) # returns query #22
|
|
42
|
+
|
|
43
|
+
# sections with uppercase params are typically accessed by string
|
|
44
|
+
params = dat[:parameters]
|
|
45
|
+
params['CHARGE'] # => an Integer
|
|
46
|
+
|
|
47
|
+
# sections with lowercase params are accessed by symbol
|
|
48
|
+
header = dat[:header]
|
|
49
|
+
header[:sequences] # => an Integer
|
|
50
|
+
|
|
51
|
+
# sections that aren't normal key/value pairs returned as a String
|
|
52
|
+
dat[:unimod] # => a String containing lots of XML
|
|
53
|
+
dat[:enzyme] # => a String with enzyme data
|
|
12
54
|
end
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Note that no support is given for accessing the 'summary' sections because they are often incomplete for large files anyway and the information can all be found by accessing the
|
|
13
58
|
|
|
59
|
+
### Enumerable information
|
|
60
|
+
|
|
61
|
+
Sections with enumerable objects may be accessed as each_<whatever> or with
|
|
62
|
+
Dat#[], which returns an enumerable. So, these are equivalent:
|
|
63
|
+
|
|
64
|
+
```ruby
|
|
65
|
+
dat.each_peptide {|pep| ... }
|
|
66
|
+
dat[:peptides].each {|pep| ... }
|
|
67
|
+
|
|
68
|
+
# these also are equivalent (return an enumerator)
|
|
69
|
+
enumerator = dat.each_peptide
|
|
70
|
+
enumerator = dat[:peptides]
|
|
14
71
|
```
|
|
15
|
-
### each peptide
|
|
16
72
|
|
|
17
|
-
|
|
73
|
+
Enumerators for some objects will have additional parameters that may be passed in (to either method style). For instance, the user may retrieve the top **n** peptide hits:
|
|
18
74
|
|
|
19
75
|
```ruby
|
|
20
|
-
dat.each_peptide
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
76
|
+
dat.each_peptide(1) {|peptide| ... } # only top peptide hits
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Queries
|
|
80
|
+
|
|
81
|
+
In a dat file, each query is its own section, but this makes them fairly
|
|
82
|
+
awkward to access. We treat them as if they were grouped into a single
|
|
83
|
+
section.
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
dat[:queries].each do |query|
|
|
87
|
+
# hash or method access
|
|
88
|
+
query[:charge] # => a positive or negative Integer
|
|
89
|
+
query.charge
|
|
90
|
+
query.Ions1 # or query.peaks
|
|
24
91
|
end
|
|
25
92
|
```
|
|
26
93
|
|
|
27
|
-
|
|
94
|
+
But they can also be accessed by query number:
|
|
28
95
|
|
|
29
96
|
```ruby
|
|
30
|
-
dat.
|
|
97
|
+
dat.query(23) # return query23
|
|
31
98
|
```
|
|
32
99
|
|
|
33
|
-
|
|
100
|
+
### Decoys
|
|
101
|
+
|
|
102
|
+
Decoy peptides may be accessed a few different ways, all of which are equivalent:
|
|
34
103
|
|
|
35
104
|
```ruby
|
|
36
|
-
dat.each_peptide(
|
|
105
|
+
dat.each_peptide(false) {|peptide| ... }
|
|
106
|
+
dat[:peptides, false].each {|peptide| ... }
|
|
107
|
+
dat.each_decoy_peptide {|peptide| ... }
|
|
108
|
+
dat[:decoy_peptides].each {|peptide| ... }
|
|
37
109
|
```
|
|
38
110
|
|
|
39
111
|
## Further Info
|
|
40
112
|
|
|
41
|
-
See
|
|
42
|
-
|
|
113
|
+
See the specs for additonal examples.
|
|
114
|
+
|
|
115
|
+
Also, see Mascot's "Installation & Setup Manual" for detailed information
|
|
116
|
+
about the .dat format itself (can be accessed from the mascot main page
|
|
117
|
+
of whichever mascot you are using).
|
|
43
118
|
|
|
44
119
|
## Copyright
|
|
45
120
|
|
data/Rakefile
CHANGED
|
@@ -13,7 +13,6 @@ Jeweler::Tasks.new do |gem|
|
|
|
13
13
|
gem.description = %Q{Reads mascot dat files with gusto for mspire library.}
|
|
14
14
|
gem.email = "jtprince@gmail.com"
|
|
15
15
|
gem.authors = ["John T. Prince"]
|
|
16
|
-
gem.add_dependency "elif", "~> 0.1.0"
|
|
17
16
|
gem.add_development_dependency "rspec", "~> 2.8.0"
|
|
18
17
|
gem.add_development_dependency "rdoc", "~> 3.12"
|
|
19
18
|
gem.add_development_dependency "jeweler", "~> 1.8.4"
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0
|
|
1
|
+
0.1.0
|
data/lib/mspire/mascot/dat.rb
CHANGED
|
@@ -1,25 +1,169 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
|
|
2
|
+
%w(
|
|
3
|
+
index
|
|
4
|
+
peptide
|
|
5
|
+
query
|
|
6
|
+
protein
|
|
7
|
+
parameters
|
|
8
|
+
header masses
|
|
9
|
+
).each do |subsection|
|
|
10
|
+
require "mspire/mascot/dat/#{subsection}"
|
|
11
|
+
end
|
|
4
12
|
|
|
5
13
|
module Mspire
|
|
6
14
|
module Mascot
|
|
7
15
|
class Dat
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
|
|
19
|
+
# reads each line from a section until reaching the end of the section
|
|
20
|
+
def each_line(io, &block)
|
|
21
|
+
return to_enum(__method__, io) unless block
|
|
22
|
+
io.each_line do |line|
|
|
23
|
+
break if line[0,2] == '--'
|
|
24
|
+
block.call(line)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# returns the key and value for KEY=VAL sections
|
|
29
|
+
def each_key_val(io, &block)
|
|
30
|
+
return to_enum(__method__, io) unless block
|
|
31
|
+
each_line(io) do |line|
|
|
32
|
+
line.chomp!
|
|
33
|
+
(key, val) = line.split('=',2)
|
|
34
|
+
block.call( [key, (val=='' ? nil : val)] )
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def string(io, &block)
|
|
39
|
+
each_line(io).to_a.join
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# returns the string after stripping off leading and trailing double
|
|
43
|
+
# quotation marks
|
|
44
|
+
def strip_quotes(string)
|
|
45
|
+
string.gsub(/\A"|"\Z/, '')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def open(file, index_file=false, &block)
|
|
50
|
+
io = File.open(file)
|
|
51
|
+
response = block.call(self.new(io, index_file))
|
|
52
|
+
io.close
|
|
53
|
+
response
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
|
|
8
59
|
# the io object which is the open dat file
|
|
9
60
|
attr_accessor :io
|
|
10
61
|
|
|
11
62
|
# the index object which points to the start byte for each section
|
|
12
63
|
attr_accessor :index
|
|
13
64
|
|
|
14
|
-
|
|
65
|
+
# if index_file is true, will attempt to use a written index file
|
|
66
|
+
# based on naming conventions; if one doesn't yet exist it will create
|
|
67
|
+
# one for the next usage. If handed a String, will consider it the
|
|
68
|
+
# index filename for reading or writing depending on whether it exists.
|
|
69
|
+
def initialize(io, index_file=false)
|
|
15
70
|
@io = io
|
|
16
|
-
|
|
71
|
+
index_filename =
|
|
72
|
+
case index_file
|
|
73
|
+
when String then index_file
|
|
74
|
+
when TrueClass then Dat::Index.index_filename(io.path)
|
|
75
|
+
else
|
|
76
|
+
nil
|
|
77
|
+
end
|
|
78
|
+
@index = Index.new
|
|
79
|
+
if index_filename && File.exist?(index_filename)
|
|
80
|
+
@index.from_byteindex!(index_filename)
|
|
81
|
+
else
|
|
82
|
+
@index.from_io!(@io)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
if index_filename && !File.exist?(index_filename)
|
|
86
|
+
@index.write(index_filename)
|
|
87
|
+
end
|
|
17
88
|
end
|
|
18
89
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
90
|
+
# the univeral way to access information
|
|
91
|
+
# returns the section with appropriate cast (if available) or as a
|
|
92
|
+
# String object with the information. nil if it doesn't exist. Also
|
|
93
|
+
# responds to :query by calling Query::each. An enumerator is called
|
|
94
|
+
# for enumerable objects.
|
|
95
|
+
#
|
|
96
|
+
# dat.section(:header) # => a Dat::Header object (hash-like)
|
|
97
|
+
# dat.section(:peptides) # => an Enumerator for peptides
|
|
98
|
+
# dat.section(:peptides, 1) # => an Enumerator for top peptides
|
|
99
|
+
# dat[:peptides, 1].each {|peptide| ... <top peptide> }
|
|
100
|
+
# # the equivalent each_<whatever> method:
|
|
101
|
+
# dat.each_peptide(1) {|peptide| ... <top peptide> }
|
|
102
|
+
#
|
|
103
|
+
# # aliased with #[] for bracket access:
|
|
104
|
+
# dat[:header]
|
|
105
|
+
# dat[:peptides, 1]
|
|
106
|
+
# ...
|
|
107
|
+
#
|
|
108
|
+
def section(*args)
|
|
109
|
+
# If the name exists as a class, then try to call the from_io method
|
|
110
|
+
# on the class (e.g., Parameters.from_io(io)). If the name is a
|
|
111
|
+
# plural, try the singular and the ::each method on the singular class
|
|
112
|
+
# (e.g., Peptide::each).
|
|
113
|
+
name = args.first.to_sym
|
|
114
|
+
capitalized = name.to_s.capitalize
|
|
115
|
+
maybe_singular =
|
|
116
|
+
case capitalized
|
|
117
|
+
when 'Queries'
|
|
118
|
+
'query'
|
|
119
|
+
else
|
|
120
|
+
start_section!(name)
|
|
121
|
+
capitalized[0...-1]
|
|
122
|
+
end
|
|
123
|
+
maybe_iterator = "each_#{maybe_singular.downcase}".to_sym
|
|
124
|
+
if self.respond_to?(maybe_iterator)
|
|
125
|
+
self.send(maybe_iterator, *args[1..-1])
|
|
126
|
+
elsif Mspire::Mascot::Dat.const_defined?(capitalized)
|
|
127
|
+
klass = Mspire::Mascot::Dat.const_get(capitalized)
|
|
128
|
+
obj = klass.new
|
|
129
|
+
if obj.respond_to?(:from_io!)
|
|
130
|
+
case name
|
|
131
|
+
when :parameters, :masses
|
|
132
|
+
obj.send(:from_io!, @io, false)
|
|
133
|
+
else
|
|
134
|
+
obj.send(:from_io!, @io)
|
|
135
|
+
end
|
|
136
|
+
else
|
|
137
|
+
nil
|
|
138
|
+
end
|
|
139
|
+
#elsif Mspire::Mascot::Dat.const_defined?(maybe_singular)
|
|
140
|
+
# klass = Mspire::Mascot::Dat.const_get(maybe_singular)
|
|
141
|
+
# klass.send(:each, @io, &block)
|
|
142
|
+
elsif @index.byte_num.key?(name)
|
|
143
|
+
Mspire::Mascot::Dat.string(@io)
|
|
144
|
+
else
|
|
145
|
+
nil
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
alias_method :[], :section
|
|
150
|
+
|
|
151
|
+
def each_protein(&block)
|
|
152
|
+
return to_enum(__method__) unless block
|
|
153
|
+
start_section!(:proteins)
|
|
154
|
+
Dat.each_key_val(@io) do |key, val|
|
|
155
|
+
(mw_s, desc) = val.split(',', 2)
|
|
156
|
+
block.call(Dat::Protein.new(Dat.strip_quotes(key), mw_s.to_f, Dat.strip_quotes(desc)))
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def each_query(&block)
|
|
161
|
+
return to_enum(__method__) unless block
|
|
162
|
+
@index.query_nums.each do |query_num|
|
|
163
|
+
byte = @index.query_num_to_byte[query_num]
|
|
164
|
+
@io.pos = byte
|
|
165
|
+
block.call( Mspire::Mascot::Dat::Query.new.from_io!(@io) )
|
|
166
|
+
end
|
|
23
167
|
end
|
|
24
168
|
|
|
25
169
|
# positions io at the beginning of the section data (past the Content
|
|
@@ -30,19 +174,40 @@ module Mspire
|
|
|
30
174
|
self
|
|
31
175
|
end
|
|
32
176
|
|
|
177
|
+
# returns query number n (these are NOT zero indexed)
|
|
33
178
|
def query(n)
|
|
34
179
|
start_section!(n)
|
|
35
|
-
Query.from_io(@io)
|
|
180
|
+
Query.new.from_io!(@io)
|
|
36
181
|
end
|
|
37
182
|
|
|
38
|
-
|
|
39
|
-
|
|
183
|
+
# optional parameters, passed in any order:
|
|
184
|
+
#
|
|
185
|
+
# top_n: [Float::INFINITY] a Numeric (top N hits)
|
|
186
|
+
# non_decoy: [true] a Boolean
|
|
187
|
+
#
|
|
188
|
+
# Returns the top_n hits. If non_decoy is false or nil, returns the
|
|
189
|
+
# decoy hits.
|
|
190
|
+
#
|
|
191
|
+
# each_peptide(false, 1) # top decoy peptide hit
|
|
192
|
+
# each_peptide(2, true) # top 2 peptide hits per query
|
|
193
|
+
# each_peptide(1) # top peptide hit per query
|
|
194
|
+
def each_peptide(*args, &block)
|
|
195
|
+
return to_enum(__method__, *args) unless block
|
|
196
|
+
(numeric, boolean) = args.partition {|arg| arg.is_a?(Numeric) }
|
|
197
|
+
top_n = numeric.first || Float::INFINITY
|
|
198
|
+
non_decoy = ((boolean.size > 0) ? boolean.first : true)
|
|
40
199
|
start_section!(non_decoy ? :peptides : :decoy_peptides)
|
|
41
|
-
|
|
42
|
-
|
|
200
|
+
Mspire::Mascot::Dat::Peptide.each(@io) do |peptide|
|
|
201
|
+
if peptide.peptide_num <= top_n
|
|
202
|
+
block.call(peptide)
|
|
203
|
+
end
|
|
43
204
|
end
|
|
44
205
|
end
|
|
45
206
|
|
|
207
|
+
def each_decoy_peptide(top_n=Float::INFINITY, &block)
|
|
208
|
+
each_peptide(false, top_n, &block)
|
|
209
|
+
end
|
|
210
|
+
|
|
46
211
|
# returns a list of all sections as symbols. The symbol :queries is
|
|
47
212
|
# returned rather than each query individually if their is 1 or more
|
|
48
213
|
# queries.
|
|
@@ -54,6 +219,8 @@ module Mspire
|
|
|
54
219
|
reply.map(&:to_sym)
|
|
55
220
|
end
|
|
56
221
|
|
|
222
|
+
alias_method :keys, :sections
|
|
223
|
+
|
|
57
224
|
end
|
|
58
225
|
end
|
|
59
226
|
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
|
|
2
|
+
module Mspire
|
|
3
|
+
module Mascot
|
|
4
|
+
class Dat
|
|
5
|
+
module Cast
|
|
6
|
+
TO_INT_ARRAY = ->(val) { val.split(',').map(&:to_i) }
|
|
7
|
+
FROM_CHARGE_STRING = ->(st) { (st[-1] << st[0...-1]).to_i }
|
|
8
|
+
CGI_UNESCAPE = ->(st) { CGI.unescape(st) }
|
|
9
|
+
FLOAT_PAIRS = ->(st) { st.split(',').map {|pair_s| pair_s.split(':').map(&:to_f) } }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
module Castable
|
|
13
|
+
# expects a hash with the parameter and the way to cast it as a symbol
|
|
14
|
+
# (e.g., :to_f or a lambda). If no hash given, will attempt to
|
|
15
|
+
# retrieve a class constant 'CAST' which defines the casts.
|
|
16
|
+
def cast!(cast_hash=nil)
|
|
17
|
+
hash = cast_hash || self.class.const_get('CAST')
|
|
18
|
+
self.each_pair do |k,v|
|
|
19
|
+
if cast=hash[k]
|
|
20
|
+
apply = cast.is_a?(Symbol) ? cast.to_proc : cast
|
|
21
|
+
self[k] = apply[v] if apply
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
self
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require 'mspire/mascot/dat/section/key_val'
|
|
2
|
+
require 'mspire/mascot/dat/cast'
|
|
3
|
+
|
|
4
|
+
module Mspire
|
|
5
|
+
module Mascot
|
|
6
|
+
class Dat
|
|
7
|
+
# The parameters is a hash with some casting (see CAST) and is
|
|
8
|
+
# accessible with upper case String keys.
|
|
9
|
+
class Header < Hash
|
|
10
|
+
include Section::KeyVal
|
|
11
|
+
include Castable
|
|
12
|
+
|
|
13
|
+
CAST = {
|
|
14
|
+
sequences: :to_i,
|
|
15
|
+
sequences_after_tax: :to_i,
|
|
16
|
+
residues: :to_i,
|
|
17
|
+
distribution: Cast::TO_INT_ARRAY,
|
|
18
|
+
distribution_decoy: Cast::TO_INT_ARRAY,
|
|
19
|
+
queries: :to_i,
|
|
20
|
+
max_hits: :to_i,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
require '
|
|
1
|
+
require 'json'
|
|
2
2
|
|
|
3
3
|
module Mspire
|
|
4
4
|
module Mascot
|
|
@@ -6,8 +6,16 @@ module Mspire
|
|
|
6
6
|
# makes a byte index (not line index)
|
|
7
7
|
class Index
|
|
8
8
|
|
|
9
|
+
INDEX_EXT = '.byteindex'
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def index_filename(file)
|
|
13
|
+
file + Dat::INDEX_EXT
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
9
17
|
# the hash holding the start byte for each section (besides the
|
|
10
|
-
# queries)
|
|
18
|
+
# queries). Keyed by symbol.
|
|
11
19
|
attr_accessor :byte_num
|
|
12
20
|
|
|
13
21
|
# the array holding the start byte for each query. It is indexed by
|
|
@@ -17,11 +25,13 @@ module Mspire
|
|
|
17
25
|
# an array of the query nums
|
|
18
26
|
attr_accessor :query_nums
|
|
19
27
|
|
|
20
|
-
|
|
28
|
+
|
|
29
|
+
# if handed an index_bytefile it will open the filename and use that
|
|
30
|
+
# for the index
|
|
31
|
+
def initialize
|
|
21
32
|
@byte_num = {}
|
|
22
33
|
@query_num_to_byte = []
|
|
23
34
|
@query_nums = []
|
|
24
|
-
from_io(io) if io
|
|
25
35
|
end
|
|
26
36
|
|
|
27
37
|
def has_queries?
|
|
@@ -29,7 +39,32 @@ module Mspire
|
|
|
29
39
|
end
|
|
30
40
|
|
|
31
41
|
# returns self
|
|
32
|
-
def
|
|
42
|
+
def from_byteindex!(filename)
|
|
43
|
+
hash = JSON.parse!( IO.read(filename) )
|
|
44
|
+
[:byte_num, :query_num_to_byte, :query_nums].each do |key|
|
|
45
|
+
self.send("#{key}=", hash[key.to_s])
|
|
46
|
+
end
|
|
47
|
+
@byte_num.keys.each {|k| @byte_num[k.to_sym] = @byte_num.delete(k) }
|
|
48
|
+
self
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def write(filename)
|
|
52
|
+
File.open(filename,'w') do |io|
|
|
53
|
+
JSON.dump(
|
|
54
|
+
{
|
|
55
|
+
byte_num: byte_num,
|
|
56
|
+
query_num_to_byte: query_num_to_byte,
|
|
57
|
+
query_nums: query_nums,
|
|
58
|
+
}, io)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def from_file!(filename)
|
|
63
|
+
File.open(filename) {|io| from_io!(io) }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# returns self
|
|
67
|
+
def from_io!(io)
|
|
33
68
|
io.rewind
|
|
34
69
|
while line=io.gets
|
|
35
70
|
io.each_line do |line|
|
|
@@ -43,7 +78,7 @@ module Mspire
|
|
|
43
78
|
@query_nums << query_num
|
|
44
79
|
@query_num_to_byte[query_num] = pos
|
|
45
80
|
else
|
|
46
|
-
@byte_num[head] = pos
|
|
81
|
+
@byte_num[head.to_sym] = pos
|
|
47
82
|
end
|
|
48
83
|
end
|
|
49
84
|
end
|
|
@@ -63,7 +98,7 @@ module Mspire
|
|
|
63
98
|
if key.is_a?(Integer)
|
|
64
99
|
@query_num_to_byte[key]
|
|
65
100
|
else
|
|
66
|
-
@byte_num[key.
|
|
101
|
+
@byte_num[key.to_sym]
|
|
67
102
|
end
|
|
68
103
|
end
|
|
69
104
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
|
|
2
|
+
require 'mspire/mascot/dat/section/key_val'
|
|
3
|
+
#require 'mspire/mascot/dat/cast'
|
|
4
|
+
|
|
5
|
+
module Mspire
|
|
6
|
+
module Mascot
|
|
7
|
+
class Dat
|
|
8
|
+
class Masses < Hash
|
|
9
|
+
include Section::KeyVal
|
|
10
|
+
#include Castable
|
|
11
|
+
|
|
12
|
+
#CAST = {
|
|
13
|
+
# 'TOL' => :to_f,
|
|
14
|
+
# 'ITOL' => :to_f,
|
|
15
|
+
# 'PFA' => :to_i,
|
|
16
|
+
# 'CHARGE' => Cast::FROM_CHARGE_STRING,
|
|
17
|
+
#}
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
require 'mspire/mascot/dat/section/key_val'
|
|
2
|
+
require 'mspire/mascot/dat/cast'
|
|
3
|
+
|
|
4
|
+
module Mspire
|
|
5
|
+
module Mascot
|
|
6
|
+
class Dat
|
|
7
|
+
class Parameters < Hash
|
|
8
|
+
include Section::KeyVal
|
|
9
|
+
include Castable
|
|
10
|
+
|
|
11
|
+
CAST = {
|
|
12
|
+
'TOL' => :to_f,
|
|
13
|
+
'ITOL' => :to_f,
|
|
14
|
+
'PFA' => :to_i,
|
|
15
|
+
'CHARGE' => Cast::FROM_CHARGE_STRING,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -1,55 +1,87 @@
|
|
|
1
|
+
require 'mspire/mascot/dat/cast'
|
|
1
2
|
|
|
2
3
|
module Mspire
|
|
3
4
|
module Mascot
|
|
4
5
|
class Dat
|
|
5
6
|
# mr = relative molecular mass; data contains keys of relative
|
|
6
7
|
Peptide = Struct.new(:missed_cleavages, :mr, :delta, :num_ions_matched, :seq, :peaks_from_ions_1, :var_mods_string, :ions_score, :ion_series_found, :peaks_from_ions_2, :peaks_from_ions_3, :query_num, :peptide_num, :proteins, :data) do
|
|
7
|
-
|
|
8
|
+
include Castable
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
# if it reaches the end of the section or it is a blank line
|
|
17
|
-
def self.from_io(io, proteins=false, data=false)
|
|
18
|
-
finished = ->(line) { line.size < 2 || line[0,2] == '--' }
|
|
10
|
+
|
|
11
|
+
# reads the next line. If it contains valid query information returns
|
|
12
|
+
# an array [query_num, peptide_num, info_tag, value]. If it no valid
|
|
13
|
+
# query information, resets the io position to the beginning of the
|
|
14
|
+
# string and returns nil.
|
|
15
|
+
def self.next_qp_data(io)
|
|
16
|
+
before = io.pos
|
|
19
17
|
line = io.readline("\n")
|
|
20
|
-
if
|
|
18
|
+
if line[0,2] == '--'
|
|
19
|
+
io.pos = before
|
|
21
20
|
nil
|
|
22
21
|
else
|
|
23
|
-
|
|
24
|
-
(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
22
|
+
line.chomp!
|
|
23
|
+
(qpstring, value) = line.split('=',2)
|
|
24
|
+
(qns, pns, info_tag) = qpstring.split('_', 3)
|
|
25
|
+
(qnum, pnum) = [qns, pns].map {|ns| ns[1..-1].to_i }
|
|
26
|
+
[qnum, pnum, info_tag, value]
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# given the value part of the initial peptide data (q1_p1=<value>),
|
|
31
|
+
# sets the object's properties. returns the pephit
|
|
32
|
+
def self.from_value_string(value, qnum, pnum)
|
|
33
|
+
(core, prots) = value.split(';', 2)
|
|
34
|
+
pephit = self.new(*core.split(','), qnum, pnum)
|
|
35
|
+
pephit.cast!
|
|
36
|
+
pephit
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# returns the query num and peptide num and info_tag and string. nil if they don't exist.
|
|
40
|
+
def self.dissect_line(line)
|
|
41
|
+
if md=/q(\d+)_p_?(\d+)(\w*)=(.*)/.match(line)
|
|
42
|
+
[md[1].to_i, md[2].to_i, md[3], md[4]]
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# returns each peptide hit. Some queries will not have *any* hits,
|
|
48
|
+
# and these are *not* yielded.
|
|
49
|
+
def self.each(io, &block)
|
|
50
|
+
return to_enum(__method__, io) unless block
|
|
51
|
+
before = io.pos
|
|
52
|
+
peptide = nil
|
|
53
|
+
while reply=dissect_line(io.readline("\n"))
|
|
54
|
+
(qnum, pnum, info_tag, value) = reply
|
|
55
|
+
if info_tag == ''
|
|
56
|
+
track_pos = io.pos
|
|
57
|
+
block.call(peptide) if peptide # yield the previous peptide
|
|
58
|
+
io.pos = track_pos
|
|
59
|
+
peptide =
|
|
60
|
+
(value == "-1") ? nil : self.from_value_string(value, qnum, pnum)
|
|
61
|
+
else
|
|
62
|
+
# implement reading in future
|
|
48
63
|
end
|
|
49
|
-
|
|
64
|
+
before = io.pos
|
|
50
65
|
end
|
|
66
|
+
# yield that last peptide
|
|
67
|
+
|
|
68
|
+
track_pos = io.pos
|
|
69
|
+
block.call(peptide) if peptide
|
|
70
|
+
io.pos = track_pos
|
|
51
71
|
end
|
|
52
72
|
end
|
|
73
|
+
class Peptide
|
|
74
|
+
CAST = {
|
|
75
|
+
missed_cleavages: :to_i,
|
|
76
|
+
mr: :to_f,
|
|
77
|
+
delta: :to_f,
|
|
78
|
+
num_ions_matched: :to_i,
|
|
79
|
+
ions: :string,
|
|
80
|
+
ions_score: :to_f,
|
|
81
|
+
peaks_from_ions_2: :to_i,
|
|
82
|
+
peaks_from_ions_3: :to_i,
|
|
83
|
+
}
|
|
84
|
+
end
|
|
53
85
|
end
|
|
54
86
|
end
|
|
55
87
|
end
|
|
@@ -2,14 +2,17 @@ require 'ostruct'
|
|
|
2
2
|
require 'delegate'
|
|
3
3
|
require 'cgi'
|
|
4
4
|
|
|
5
|
+
require 'mspire/mascot/dat/cast'
|
|
6
|
+
|
|
5
7
|
module Mspire
|
|
6
8
|
module Mascot
|
|
7
9
|
class Dat
|
|
8
10
|
class Query < Hash
|
|
11
|
+
include Castable
|
|
9
12
|
|
|
10
13
|
CAST = {
|
|
11
|
-
charge:
|
|
12
|
-
title:
|
|
14
|
+
charge: Cast::FROM_CHARGE_STRING,
|
|
15
|
+
title: Cast::CGI_UNESCAPE,
|
|
13
16
|
mass_min: :to_f,
|
|
14
17
|
mass_max: :to_f,
|
|
15
18
|
int_min: :to_f,
|
|
@@ -17,27 +20,19 @@ module Mspire
|
|
|
17
20
|
num_vals: :to_i,
|
|
18
21
|
num_used1: :to_i,
|
|
19
22
|
index: :to_i,
|
|
20
|
-
Ions1:
|
|
23
|
+
Ions1: Cast::FLOAT_PAIRS,
|
|
21
24
|
}
|
|
22
25
|
|
|
23
26
|
# returns self
|
|
24
|
-
def
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
break if line[0,2] == '--'
|
|
28
|
-
line.chomp!
|
|
29
|
-
(key, val) = line.split('=')
|
|
30
|
-
query[key.to_sym] = val
|
|
31
|
-
end
|
|
32
|
-
query.each do |k,v|
|
|
33
|
-
if cast=CAST[k]
|
|
34
|
-
apply = cast.is_a?(Symbol) ? cast.to_proc : cast
|
|
35
|
-
query[k] = apply[v] if apply
|
|
36
|
-
end
|
|
27
|
+
def from_io!(io)
|
|
28
|
+
Dat.each_key_val(io) do |key,val|
|
|
29
|
+
self[key.to_sym] = val
|
|
37
30
|
end
|
|
38
|
-
|
|
31
|
+
cast!
|
|
39
32
|
end
|
|
40
33
|
|
|
34
|
+
def peaks() self[:Ions1] end
|
|
35
|
+
|
|
41
36
|
def method_missing(*args, &block)
|
|
42
37
|
if args[0].to_s[-1] == '='
|
|
43
38
|
if self.key?(args[0...-1])
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
|
|
2
|
+
module Mspire
|
|
3
|
+
module Mascot
|
|
4
|
+
class Dat
|
|
5
|
+
module Section
|
|
6
|
+
module KeyVal
|
|
7
|
+
def from_io!(io, as_symbols=true)
|
|
8
|
+
Dat.each_key_val(io) do |key,val|
|
|
9
|
+
self[ as_symbols ? key.to_sym : key ] = val
|
|
10
|
+
end
|
|
11
|
+
self.send(:cast!) if self.respond_to?(:cast!)
|
|
12
|
+
self
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -2,16 +2,18 @@ require 'spec_helper'
|
|
|
2
2
|
|
|
3
3
|
require 'mspire/mascot/dat/index'
|
|
4
4
|
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
|
|
5
7
|
describe 'Mspire::Mascot::Dat::Index being initialized from file' do
|
|
6
8
|
|
|
7
9
|
let(:io) { File.open(TESTFILES + "/F004128.dat") }
|
|
8
10
|
|
|
9
11
|
specify '#initialize(io) creates the index object' do
|
|
10
|
-
Mspire::Mascot::Dat::Index.new(io).should be_a(Mspire::Mascot::Dat::Index)
|
|
12
|
+
Mspire::Mascot::Dat::Index.new.from_io!(io).should be_a(Mspire::Mascot::Dat::Index)
|
|
11
13
|
end
|
|
12
14
|
|
|
13
15
|
describe Mspire::Mascot::Dat::Index do
|
|
14
|
-
subject { Mspire::Mascot::Dat::Index.new(io) }
|
|
16
|
+
subject { Mspire::Mascot::Dat::Index.new.from_io!(io) }
|
|
15
17
|
|
|
16
18
|
it 'can access the header start byte nums' do
|
|
17
19
|
|
|
@@ -40,5 +42,18 @@ describe 'Mspire::Mascot::Dat::Index being initialized from file' do
|
|
|
40
42
|
subject['peptides'].should == 41624
|
|
41
43
|
end
|
|
42
44
|
|
|
45
|
+
it 'can write the index info and create an identical object from the file' do
|
|
46
|
+
spec_tmpdir do |tmpdir|
|
|
47
|
+
bytefile = tmpdir + "/index_bytefile.tmp"
|
|
48
|
+
subject.write( bytefile )
|
|
49
|
+
File.exist?( bytefile ).should be_true
|
|
50
|
+
File.size( bytefile ).should be > 0
|
|
51
|
+
fromfile = Mspire::Mascot::Dat::Index.new.from_byteindex!( bytefile )
|
|
52
|
+
[:byte_num, :query_num_to_byte, :query_nums].each do |methd|
|
|
53
|
+
fromfile.send(methd).should == subject.send(methd)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
43
58
|
end
|
|
44
59
|
end
|
|
@@ -1,24 +1,59 @@
|
|
|
1
1
|
require 'spec_helper'
|
|
2
2
|
|
|
3
|
+
require 'mspire/mascot/dat'
|
|
3
4
|
require 'mspire/mascot/dat/peptide'
|
|
4
5
|
|
|
5
|
-
describe 'reading off
|
|
6
|
+
describe 'reading off the peptides' do
|
|
6
7
|
|
|
7
|
-
before(:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@io
|
|
8
|
+
before(:each) do
|
|
9
|
+
file = TESTFILES + '/F004129.dat'
|
|
10
|
+
@io = File.open(file)
|
|
11
|
+
@io.pos = 62743
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
after(:each) do
|
|
15
|
+
@io.close
|
|
15
16
|
end
|
|
16
17
|
|
|
17
|
-
it '
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
peptide.send(k).should == v
|
|
18
|
+
it 'has an iterator' do
|
|
19
|
+
info = Mspire::Mascot::Dat::Peptide.each(@io).map do |pep|
|
|
20
|
+
[pep.ions_score, pep.query_num, pep.peptide_num]
|
|
21
21
|
end
|
|
22
|
+
info.should == [[0.22, 1, 1], [4.11, 2, 1], [2.84, 2, 2], [2.83, 2, 3], [2.65, 2, 4], [2.28, 2, 5], [1.07, 2, 6], [0.99, 2, 7], [0.96, 2, 8], [0.65, 2, 9], [0.63, 2, 10]]
|
|
22
23
|
end
|
|
23
24
|
|
|
24
25
|
end
|
|
26
|
+
|
|
27
|
+
=begin
|
|
28
|
+
q1_p1=0,2113.999008,-0.998912,4,VMLSDADPSLEQYYVNVR,17,00100000000000000000,0.22,0002002000000000000,0,0;"Q23985":0:79:96:1
|
|
29
|
+
q1_p1_terms=R,T
|
|
30
|
+
q1_p1_primary_nl=00100000000000000000
|
|
31
|
+
q2_p1=0,2978.269196,1.195840,5,MDSSSGSQGNGSFMDQNSLGILNMDNLK,17,000000000000001000000000100000,4.11,0002000020000000000,0,0;"Q9VV79":0:1:28:1
|
|
32
|
+
q2_p1_terms=-,V
|
|
33
|
+
q2_p1_primary_nl=000000000000002000000000200000
|
|
34
|
+
q2_p2=1,2979.449478,0.015558,5,STGAESSEEXLREAYIMASVEHVNLLK,45,00000000000000000100000000000,2.84,0000000020000000000,0,0;"Q6SAG3":0:875:901:1
|
|
35
|
+
q2_p2_terms=K,L
|
|
36
|
+
q2_p2_primary_nl=00000000000000000200000000000
|
|
37
|
+
q2_p2_subst=10,X,T
|
|
38
|
+
q2_p3=1,2980.525406,-1.060370,5,LSSPPSTSHTYEGKLLTKPTHTNTDLR,45,00000000000000000000000000000,2.83,0000000020000000000,0,0;"Q6KEU5":0:82:108:1,"Q6KEU6":0:82:108:1
|
|
39
|
+
q2_p3_terms=K,G:K,G
|
|
40
|
+
q2_p4=0,2978.269196,1.195840,4,MDSSSGSQGNGSFMDQNSLGILNMDNLK,31,010000000000000000000000100000,2.65,0000000020000000000,0,0;"Q9VV79":0:1:28:1
|
|
41
|
+
q2_p4_terms=-,V
|
|
42
|
+
q2_p4_primary_nl=020000000000000000000000200000
|
|
43
|
+
q2_p5=1,2980.380035,-0.914999,4,FGDMFSKESEQVALAVYEAYDPNVGSK,17,00000000000000000000000000000,2.28,0000002020000000000,0,0;"P82982":0:340:366:1
|
|
44
|
+
q2_p5_terms=K,S
|
|
45
|
+
q2_p6=0,2978.688492,0.776544,8,GAEFSSFSVVLLVIILIIVFLSNAYHK,118,00000000000000000000000000000,1.07,0000020000000000000,0,0;"A8DYF1":0:2:28:1
|
|
46
|
+
q2_p6_terms=M,A
|
|
47
|
+
q2_p7=1,2979.588989,-0.123953,4,KLQSNATVLSDGYAAHLAGLQAVGGSRPAK,45,00000000000000000000000000000000,0.99,0000020010000000000,0,0;"P43125":0:1187:1216:1,"P43125-2":0:1187:1216:1,"D2NUF3":0:172:201:1
|
|
48
|
+
q2_p7_terms=K,G:K,G:K,G
|
|
49
|
+
q2_p8=0,2980.229141,-0.764105,3,LMFGDEEGNLPSLDQEDEQVPETEED,31,0010000000000000000000000000,0.96,0002000000000000000,0,0;"Q9VVZ8":0:703:728:1
|
|
50
|
+
q2_p8_terms=R,-
|
|
51
|
+
q2_p8_primary_nl=0020000000000000000000000000
|
|
52
|
+
q2_p9=1,2978.364624,1.100412,2,LCPRCFQELSDYDTIMVNLMTTQK,17,00000000000000000000100000,0.65,0000002000000000000,0,0;"P07664":0:60:83:1
|
|
53
|
+
q2_p9_terms=R,R
|
|
54
|
+
q2_p9_primary_nl=00000000000000000000100000
|
|
55
|
+
q2_p10=0,2978.672592,0.792444,5,NGSSVAGTSVLSPSIPLTLVVLPALMIAQK,70,00000000000000000000000000100000,0.63,0000000020000000000,0,0;"B8A406":0:72:101:1,"C6SUW4":0:177:206:1,"A1Z9D9":0:246:275:1
|
|
56
|
+
q2_p10_terms=K,S:K,S:K,S
|
|
57
|
+
q2_p10_primary_nl=00000000000000000000000000100000
|
|
58
|
+
=end
|
|
59
|
+
|
|
@@ -24,8 +24,8 @@ END
|
|
|
24
24
|
@io = StringIO.new(data)
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
query = Mspire::Mascot::Dat::Query.from_io(@io)
|
|
27
|
+
specify '#from_io(io) returns a query object with appropriate casts' do
|
|
28
|
+
query = Mspire::Mascot::Dat::Query.new.from_io!(@io)
|
|
29
29
|
query.title.should == '1.2746.2746.2'
|
|
30
30
|
query.charge.should == -2
|
|
31
31
|
end
|
|
@@ -23,72 +23,172 @@ describe 'reading a dat file' do
|
|
|
23
23
|
@io.close
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
specify '#sections() returns all the sections (with queries considered a single group)' do
|
|
27
|
+
(sections=@dat.sections).should == [:parameters, :masses, :unimod, :enzyme, :header, :summary, :decoy_summary, :peptides, :decoy_peptides, :proteins, :index, :queries]
|
|
28
|
+
@dat.keys.should == sections
|
|
28
29
|
end
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
specify '#query(n) can retrieve queries at random' do
|
|
31
32
|
@dat.query(1).title.should == '1.2746.2746.2'
|
|
32
33
|
@dat.query(2).title.should == '1.2745.2745.4'
|
|
33
34
|
end
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
start = [ [1,1,'VMLSDADPSLEQYYVNVR'],
|
|
37
|
-
[2,1,'MDSSSGSQGNGSFMDQNSLGILNMDNLK'],
|
|
38
|
-
[2,2,'STGAESSEEXLREAYIMASVEHVNLLK'],
|
|
39
|
-
[2,3,'LSSPPSTSHTYEGKLLTKPTHTNTDLR'],
|
|
40
|
-
[2,4,'MDSSSGSQGNGSFMDQNSLGILNMDNLK']]
|
|
36
|
+
describe '#Dat[:<name>]' do
|
|
41
37
|
|
|
42
|
-
|
|
38
|
+
specify "#[:parameters] returns a hash-like object with proper casts" do
|
|
39
|
+
params = @dat[:parameters]
|
|
40
|
+
params.should be_a(Mspire::Mascot::Dat::Parameters)
|
|
41
|
+
params['LICENSE'].should == 'Licensed to: Brigham Young University, Provo, United States RCCZ-D4GH-S53W-2G5F-NG5L, (1 processor).'
|
|
42
|
+
params['IATOL'].should be_nil
|
|
43
|
+
params.key?('IATOL').should be_true
|
|
44
|
+
params.key?('silliness').should be_false
|
|
45
|
+
params['IT_MODS'].should == 'Oxidation (M)'
|
|
46
|
+
params['TOL'].should == 1.2
|
|
47
|
+
params['CHARGE'].should == 2
|
|
48
|
+
params['INTERNALS'].should == "0.0,700.0"
|
|
49
|
+
end
|
|
43
50
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
peptide.seq.should == aa
|
|
52
|
-
end
|
|
51
|
+
specify "#[:header] returns hash-like object with casts" do
|
|
52
|
+
header = @dat[:header]
|
|
53
|
+
header.should be_a(Mspire::Mascot::Dat::Header)
|
|
54
|
+
header[:sequences].should == 34724
|
|
55
|
+
header[:residues].should == 17622530
|
|
56
|
+
header[:distribution].should == [30914, 38, 61, 154, 203, 295, 417, 447, 500, 442, 360, 239, 168, 167, 98, 60, 39, 24, 15, 16, 14, 8, 7, 8, 5, 4, 7, 3, 3, 1, 1, 3, 1, 0, 1, 0, 1]
|
|
57
|
+
header[:release].should == 'GbetaCCT_drome.fasta'
|
|
53
58
|
end
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
# this proves that each_peptide can also return an enumerator if asked
|
|
61
|
-
cnts = @dat.each_peptide.with_index.map do |peptide,i|
|
|
62
|
-
peptide.should(be_a(Mspire::Mascot::Dat::Peptide)) && i
|
|
59
|
+
|
|
60
|
+
specify '#[:masses] returns key val pairs (uncast)' do
|
|
61
|
+
masses = @dat[:masses]
|
|
62
|
+
masses.should be_an(Mspire::Mascot::Dat::Masses)
|
|
63
|
+
masses['A'].should == '71.037114'
|
|
64
|
+
masses['FixedModResidues1'].should == 'C'
|
|
63
65
|
end
|
|
64
|
-
cnts.should == (0..10).to_a
|
|
65
|
-
end
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
67
|
+
specify '#[:unimod] returns as a string the entire section' do
|
|
68
|
+
unimod_string = @dat[:unimod]
|
|
69
|
+
lines = unimod_string.each_line.to_a
|
|
70
|
+
lines.first.chomp.should == '<?xml version="1.0" encoding="UTF-8" ?>'
|
|
71
|
+
lines[-2].chomp.should == '</umod:unimod>'
|
|
72
72
|
end
|
|
73
|
-
ions_score_target.should == [0.22, 4.11]
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
74
|
+
specify '#[:enzyme] returns as a string the entire section' do
|
|
75
|
+
enzyme_string = @dat[:enzyme]
|
|
76
|
+
lines = enzyme_string.each_line.to_a
|
|
77
|
+
lines.first.chomp.should == 'Title:Trypsin'
|
|
78
|
+
lines.last.chomp.should == '*'
|
|
79
79
|
end
|
|
80
|
-
ions_score_decoy.should == [3.52, 4.58]
|
|
81
80
|
end
|
|
82
81
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
82
|
+
describe 'iterators' do
|
|
83
|
+
|
|
84
|
+
describe 'each_<name>' do
|
|
85
|
+
|
|
86
|
+
specify '#each_query retrieves every query' do
|
|
87
|
+
queries = @dat.each_query.to_a
|
|
88
|
+
queries.size.should == 2
|
|
89
|
+
queries.first.title.should == '1.2746.2746.2'
|
|
90
|
+
queries.last.title.should == '1.2745.2745.4'
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
specify '#each_peptide can retrieve every peptide' do
|
|
94
|
+
start = [ [1,1,'VMLSDADPSLEQYYVNVR'],
|
|
95
|
+
[2,1,'MDSSSGSQGNGSFMDQNSLGILNMDNLK'],
|
|
96
|
+
[2,2,'STGAESSEEXLREAYIMASVEHVNLLK'],
|
|
97
|
+
[2,3,'LSSPPSTSHTYEGKLLTKPTHTNTDLR'],
|
|
98
|
+
[2,4,'MDSSSGSQGNGSFMDQNSLGILNMDNLK']]
|
|
99
|
+
|
|
100
|
+
last = [2,10,'NGSSVAGTSVLSPSIPLTLVVLPALMIAQK']
|
|
101
|
+
|
|
102
|
+
last_pep = nil
|
|
103
|
+
@dat.each_peptide do |peptide|
|
|
104
|
+
last_pep = peptide
|
|
105
|
+
(qnum, pnum, aa) = start.shift
|
|
106
|
+
if qnum
|
|
107
|
+
peptide.query_num.should == qnum
|
|
108
|
+
peptide.peptide_num.should == pnum
|
|
109
|
+
peptide.seq.should == aa
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
(qnum, pnum, aa) = last
|
|
113
|
+
peptide = last_pep
|
|
114
|
+
peptide.query_num.should == qnum
|
|
115
|
+
peptide.peptide_num.should == pnum
|
|
116
|
+
peptide.seq.should == aa
|
|
117
|
+
|
|
118
|
+
# this proves that each_peptide can also return an enumerator if asked
|
|
119
|
+
cnts = @dat.each_peptide.with_index.map do |peptide,i|
|
|
120
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide)) && i
|
|
121
|
+
end
|
|
122
|
+
cnts.should == (0..10).to_a
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
specify '#each_peptide(true/false) can retrieve normal/decoy peptides' do
|
|
126
|
+
ions_score_target = @dat.each_peptide(true, 1).map do |peptide|
|
|
127
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide))
|
|
128
|
+
peptide.peptide_num.should == 1
|
|
129
|
+
peptide.ions_score
|
|
130
|
+
end
|
|
131
|
+
ions_score_target.should == [0.22, 4.11]
|
|
132
|
+
|
|
133
|
+
[:to_a, :reverse].each do |ar_order|
|
|
134
|
+
args = [1, false].send(ar_order)
|
|
135
|
+
ions_score_decoy = @dat.each_peptide(*args).map do |peptide|
|
|
136
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide))
|
|
137
|
+
peptide.peptide_num.should == 1
|
|
138
|
+
peptide.ions_score
|
|
139
|
+
end
|
|
140
|
+
ions_score_decoy.should == [3.52, 4.58]
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
specify '#each_peptide(n) can retrieve just the top n peptides' do
|
|
145
|
+
n = 1
|
|
146
|
+
cnt = 0
|
|
147
|
+
@dat.each_peptide(n) do |peptide|
|
|
148
|
+
cnt += 1
|
|
149
|
+
peptide.should(be_a(Mspire::Mascot::Dat::Peptide))
|
|
150
|
+
peptide.query_num.should == cnt
|
|
151
|
+
peptide.peptide_num.should == 1
|
|
152
|
+
end
|
|
153
|
+
end
|
|
91
154
|
end
|
|
92
|
-
end
|
|
93
155
|
|
|
156
|
+
describe '#[:<name>] iterators' do
|
|
157
|
+
|
|
158
|
+
specify '#[:peptides] returns an enumerator' do
|
|
159
|
+
@dat[:peptides].should be_an(Enumerator)
|
|
160
|
+
@dat[:peptides].map(&:peptide_num).should == [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
161
|
+
@dat[:peptides].map(&:query_num).should == [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
|
|
162
|
+
@dat[:peptides].map(&:ions_score).should == [0.22, 4.11, 2.84, 2.83, 2.65, 2.28, 1.07, 0.99, 0.96, 0.65, 0.63]
|
|
163
|
+
@dat[:peptides, true, 1].map(&:peptide_num).should == [1,1]
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
specify '#[:decoy_peptides] returns an enumerator (or takes a block)' do
|
|
167
|
+
@dat[:decoy_peptides].should be_an(Enumerator)
|
|
168
|
+
@dat[:decoy_peptides].map(&:peptide_num).should == [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
169
|
+
@dat[:decoy_peptides].map(&:query_num).should == [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
|
|
170
|
+
@dat[:decoy_peptides].map(&:ions_score).should == [3.52, 4.58, 3.46, 3.3, 3.05, 3.05, 2.99, 2.97, 2.97, 2.87, 2.87]
|
|
171
|
+
@dat[:decoy_peptides, 1].map(&:peptide_num).should == [1,1]
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
specify '#[:queries] returns an enumerator (or takes a block)' do
|
|
175
|
+
@dat[:queries].should be_an(Enumerator)
|
|
176
|
+
@dat[:queries].map(&:title).should == ["1.2746.2746.2", "1.2745.2745.4"]
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
specify '#[:proteins] returns an enumerator (or takes a block)' do
|
|
180
|
+
data = [
|
|
181
|
+
["Q9VV79", 125605.17, "BcDNA.LD24702 OS=Drosophila melanogaster GN=spd-2 PE=1 SV=2"],
|
|
182
|
+
["Q23985", 82989.73, "Protein deltex OS=Drosophila melanogaster GN=dx PE=1 SV=2"]
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
@dat[:proteins].each do |protein|
|
|
186
|
+
exp = data.shift
|
|
187
|
+
protein.accession.should == exp.shift
|
|
188
|
+
protein.mw.should == exp.shift
|
|
189
|
+
protein.description.should == exp.shift
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
94
194
|
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -1,11 +1,27 @@
|
|
|
1
1
|
require 'rspec'
|
|
2
2
|
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
|
|
3
5
|
# Requires supporting files with custom matchers and macros, etc,
|
|
4
6
|
# in ./support/ and its subdirectories.
|
|
5
7
|
#Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
|
6
8
|
|
|
9
|
+
SPEC_DIR = File.dirname(__FILE__)
|
|
10
|
+
|
|
7
11
|
RSpec.configure do |config|
|
|
8
12
|
config.treat_symbols_as_metadata_keys_with_true_values = true
|
|
13
|
+
config.formatter = :documentation
|
|
9
14
|
end
|
|
10
15
|
|
|
11
|
-
TESTFILES =
|
|
16
|
+
TESTFILES = SPEC_DIR + "/testfiles"
|
|
17
|
+
|
|
18
|
+
# creates a tmpdir, passes it into the block as a full path, then destroys at
|
|
19
|
+
# close of block. Returns whatever was returned by the block.
|
|
20
|
+
def spec_tmpdir(&block)
|
|
21
|
+
dir = File.expand_path(SPEC_DIR + "/tmp")
|
|
22
|
+
FileUtils.rm_rf( dir )
|
|
23
|
+
FileUtils.mkdir( dir )
|
|
24
|
+
reply = block.call(dir)
|
|
25
|
+
FileUtils.rm_rf( dir )
|
|
26
|
+
reply
|
|
27
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mspire-mascot-dat
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,24 +9,8 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-
|
|
12
|
+
date: 2013-04-11 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
|
-
- !ruby/object:Gem::Dependency
|
|
15
|
-
name: elif
|
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
|
17
|
-
none: false
|
|
18
|
-
requirements:
|
|
19
|
-
- - ~>
|
|
20
|
-
- !ruby/object:Gem::Version
|
|
21
|
-
version: 0.1.0
|
|
22
|
-
type: :runtime
|
|
23
|
-
prerelease: false
|
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
-
none: false
|
|
26
|
-
requirements:
|
|
27
|
-
- - ~>
|
|
28
|
-
- !ruby/object:Gem::Version
|
|
29
|
-
version: 0.1.0
|
|
30
14
|
- !ruby/object:Gem::Dependency
|
|
31
15
|
name: rspec
|
|
32
16
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -90,10 +74,15 @@ files:
|
|
|
90
74
|
- Rakefile
|
|
91
75
|
- VERSION
|
|
92
76
|
- lib/mspire/mascot/dat.rb
|
|
77
|
+
- lib/mspire/mascot/dat/cast.rb
|
|
78
|
+
- lib/mspire/mascot/dat/header.rb
|
|
93
79
|
- lib/mspire/mascot/dat/index.rb
|
|
80
|
+
- lib/mspire/mascot/dat/masses.rb
|
|
81
|
+
- lib/mspire/mascot/dat/parameters.rb
|
|
94
82
|
- lib/mspire/mascot/dat/peptide.rb
|
|
83
|
+
- lib/mspire/mascot/dat/protein.rb
|
|
95
84
|
- lib/mspire/mascot/dat/query.rb
|
|
96
|
-
- mspire
|
|
85
|
+
- lib/mspire/mascot/dat/section/key_val.rb
|
|
97
86
|
- spec/mspire/mascot/dat/index_spec.rb
|
|
98
87
|
- spec/mspire/mascot/dat/peptide_spec.rb
|
|
99
88
|
- spec/mspire/mascot/dat/query_spec.rb
|
data/mspire-mascot-dat.gemspec
DELETED
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# Generated by jeweler
|
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
|
-
# -*- encoding: utf-8 -*-
|
|
5
|
-
|
|
6
|
-
Gem::Specification.new do |s|
|
|
7
|
-
s.name = "mspire-mascot-dat"
|
|
8
|
-
s.version = "0.0.1"
|
|
9
|
-
|
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
-
s.authors = ["John T. Prince"]
|
|
12
|
-
s.date = "2013-03-28"
|
|
13
|
-
s.description = "Reads mascot dat files with gusto for mspire library."
|
|
14
|
-
s.email = "jtprince@gmail.com"
|
|
15
|
-
s.extra_rdoc_files = [
|
|
16
|
-
"LICENSE.txt",
|
|
17
|
-
"README.md"
|
|
18
|
-
]
|
|
19
|
-
s.files = [
|
|
20
|
-
".document",
|
|
21
|
-
".rspec",
|
|
22
|
-
"LICENSE.txt",
|
|
23
|
-
"README.md",
|
|
24
|
-
"Rakefile",
|
|
25
|
-
"VERSION",
|
|
26
|
-
"lib/mspire/mascot/dat.rb",
|
|
27
|
-
"lib/mspire/mascot/dat/index.rb",
|
|
28
|
-
"lib/mspire/mascot/dat/peptide.rb",
|
|
29
|
-
"lib/mspire/mascot/dat/query.rb",
|
|
30
|
-
"mspire-mascot-dat.gemspec",
|
|
31
|
-
"spec/mspire/mascot/dat/index_spec.rb",
|
|
32
|
-
"spec/mspire/mascot/dat/peptide_spec.rb",
|
|
33
|
-
"spec/mspire/mascot/dat/query_spec.rb",
|
|
34
|
-
"spec/mspire/mascot/dat_spec.rb",
|
|
35
|
-
"spec/reference/dat_format_reference.md",
|
|
36
|
-
"spec/reference/two_spectra_decoy_F004129.png",
|
|
37
|
-
"spec/reference/two_spectra_no_decoy_F004128.png",
|
|
38
|
-
"spec/spec_helper.rb",
|
|
39
|
-
"spec/testfiles/F004128.dat",
|
|
40
|
-
"spec/testfiles/F004129.dat",
|
|
41
|
-
"spec/testfiles/two_spectra.mgf"
|
|
42
|
-
]
|
|
43
|
-
s.homepage = "http://github.com/princelab/mspire-mascot-dat"
|
|
44
|
-
s.licenses = ["MIT"]
|
|
45
|
-
s.require_paths = ["lib"]
|
|
46
|
-
s.rubygems_version = "1.8.23"
|
|
47
|
-
s.summary = "Reads mascot dat files for mspire library."
|
|
48
|
-
|
|
49
|
-
if s.respond_to? :specification_version then
|
|
50
|
-
s.specification_version = 3
|
|
51
|
-
|
|
52
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
53
|
-
s.add_runtime_dependency(%q<elif>, ["~> 0.1.0"])
|
|
54
|
-
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
|
55
|
-
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
|
56
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
|
57
|
-
else
|
|
58
|
-
s.add_dependency(%q<elif>, ["~> 0.1.0"])
|
|
59
|
-
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
|
60
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
61
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
|
62
|
-
end
|
|
63
|
-
else
|
|
64
|
-
s.add_dependency(%q<elif>, ["~> 0.1.0"])
|
|
65
|
-
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
|
66
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
67
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|