mascot-dat 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/Rakefile +3 -4
- data/lib/mascot/dat/masses.rb +52 -12
- data/lib/mascot/dat/psm.rb +0 -2
- data/lib/mascot/dat/version.rb +1 -1
- data/lib/mascot/dat.rb +22 -18
- data/test/fixtures/query23_peaks.dmp +1 -0
- data/test/test_mascot-dat-helper.rb +3 -0
- data/test/test_mascot-dat-masses.rb +2 -0
- data/test/test_mascot-dat.rb +8 -1
- metadata +10 -8
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -8,11 +8,10 @@ Rake::TestTask.new do |t|
|
|
8
8
|
t.verbose = true
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
11
|
# documentation
|
13
12
|
require 'yard'
|
14
13
|
require 'yard/rake/yardoc_task'
|
15
14
|
YARD::Rake::YardocTask.new do |t|
|
16
|
-
t.files = ['lib/**/*.rb']
|
17
|
-
t.options =
|
18
|
-
end
|
15
|
+
t.files = ['lib/**/*.rb', '-' , "README.rdoc","LICENSE"]
|
16
|
+
t.options = ["-r","README.rdoc"]
|
17
|
+
end
|
data/lib/mascot/dat/masses.rb
CHANGED
@@ -1,23 +1,60 @@
|
|
1
1
|
module Mascot
|
2
2
|
class DAT
|
3
|
+
|
4
|
+
# A Hash of the mass table section. See the {#masstable masstable} instance method for details.
|
3
5
|
class Masses
|
6
|
+
# The main table of masses. Given the following examples from a DAT file:
|
7
|
+
#
|
8
|
+
# W=186.079313
|
9
|
+
# X=111.000000
|
10
|
+
# Y=163.063329
|
11
|
+
# Z=128.550590
|
12
|
+
# Hydrogen=1.007825
|
13
|
+
# Carbon=12.000000
|
14
|
+
#
|
15
|
+
# You can access the value for Hydrogen as:
|
16
|
+
# mydat.masses.masstable[:Hydorgen] # => 1.007825
|
17
|
+
# or
|
18
|
+
# mydat.masses.m[:Hydrogen] # => 1.007825
|
19
|
+
# or
|
20
|
+
# mydat.masses.hydorgen # => 1.007825
|
21
|
+
attr_reader :masstable
|
22
|
+
# def masstable
|
23
|
+
# @masstable
|
24
|
+
# end
|
25
|
+
alias_method :m, :masstable
|
4
26
|
|
5
|
-
#
|
27
|
+
# A subset of the mass table defining the variable modications. For
|
28
|
+
# example, given the following delta in a DAT file:
|
29
|
+
#
|
30
|
+
# delta1=15.994915,Oxidation (M)
|
31
|
+
#
|
32
|
+
# Then the following gets defined:
|
33
|
+
#
|
34
|
+
# @deltas = [[15.994915,"Oxidation (M)"], ... ]
|
35
|
+
#
|
6
36
|
attr_reader :deltas
|
37
|
+
alias_method :mods, :deltas
|
38
|
+
alias_method :d, :deltas
|
7
39
|
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
# A Hash of the mass table. E.g.
|
40
|
+
# A subset of the mass table defining the fixed modifications. For
|
41
|
+
# example:
|
12
42
|
#
|
13
|
-
#
|
43
|
+
# FixedMod1=57.021464,Carbamidomethyl (C)
|
44
|
+
# FixedModResidues1=C
|
14
45
|
#
|
15
|
-
|
46
|
+
# Then the following gets defined:
|
47
|
+
#
|
48
|
+
# @fixed_modifications = [[57.021464, "Carbamidomethyl (C)", "C"], ...]
|
49
|
+
#
|
50
|
+
attr_reader :fixed_modifications
|
51
|
+
alias_method :fixed_mods, :fixed_modifications
|
52
|
+
alias_method :f, :fixed_modifications
|
16
53
|
|
17
54
|
def initialize masses_section
|
18
55
|
@masstable = {}
|
19
56
|
@deltas = []
|
20
|
-
@
|
57
|
+
@fixed_modifications = []
|
21
58
|
|
22
59
|
masses_section.split("\n").each do |l|
|
23
60
|
k,v = l.split("=")
|
@@ -32,16 +69,19 @@ module Mascot
|
|
32
69
|
idx = $2.to_i - 1
|
33
70
|
if $1.empty?
|
34
71
|
# new fixed mod record
|
35
|
-
@
|
36
|
-
@
|
37
|
-
@masstable[k.to_sym] = @
|
72
|
+
@fixed_modifications[idx] = v.split(",")
|
73
|
+
@fixed_modifications[idx][0] = @fixed_modifications[idx][0].to_f
|
74
|
+
@masstable[k.to_sym] = @fixed_modifications[idx][0]
|
38
75
|
else
|
39
76
|
# append the modified residue to the array
|
40
|
-
@
|
77
|
+
@fixed_modifications[idx] << v
|
41
78
|
end
|
42
79
|
else
|
43
80
|
@masstable[k.to_sym] = v.to_f
|
44
81
|
end
|
82
|
+
@masstable.keys.each do |m|
|
83
|
+
self.class.send(:define_method,m, lambda { @masstable[m] })
|
84
|
+
end
|
45
85
|
end
|
46
86
|
end
|
47
87
|
end
|
data/lib/mascot/dat/psm.rb
CHANGED
@@ -39,10 +39,8 @@ module Mascot
|
|
39
39
|
k,v = l.split "="
|
40
40
|
case k
|
41
41
|
when /^q(\d+)_p(\d+)$/
|
42
|
-
# just put query & rank in the beginning, because $1, $2 return zero after the split
|
43
42
|
psm_result.query = $1.to_i
|
44
43
|
psm_result.rank = $2.to_i
|
45
|
-
#main result, must split value
|
46
44
|
psm_vals, prots = v.split(";")
|
47
45
|
psm_vals = psm_vals.split(',')
|
48
46
|
psm_result.missed_cleavages= psm_vals[0].to_i
|
data/lib/mascot/dat/version.rb
CHANGED
data/lib/mascot/dat.rb
CHANGED
@@ -24,6 +24,7 @@ module Mascot
|
|
24
24
|
class DAT
|
25
25
|
attr_reader :idx
|
26
26
|
attr_reader :boundary
|
27
|
+
attr_reader :boundary_string
|
27
28
|
attr_reader :dat_file
|
28
29
|
SECTIONS = ["summary", "decoy_summary", "et_summary", "parameters",
|
29
30
|
"peptides", "decoy_peptides", "et_peptides",
|
@@ -34,6 +35,7 @@ module Mascot
|
|
34
35
|
@dat_file = File.open(dat_file_path)
|
35
36
|
@idx = {}
|
36
37
|
@boundary = nil
|
38
|
+
@boundary_string = nil
|
37
39
|
@cache_index = cache_index
|
38
40
|
parse_index
|
39
41
|
end
|
@@ -53,11 +55,10 @@ module Mascot
|
|
53
55
|
def query(n)
|
54
56
|
# search index for this
|
55
57
|
bytepos = @idx["query#{n}".to_sym]
|
56
|
-
@dat_file.pos = bytepos
|
57
|
-
@dat_file.readline # ADDED
|
58
|
+
@dat_file.pos = bytepos + @boundary_string.length
|
58
59
|
att_rx = /(\w+)\=(.+)/
|
59
60
|
q = {}
|
60
|
-
each do |l|
|
61
|
+
@dat_file.each do |l|
|
61
62
|
l.chomp
|
62
63
|
case l
|
63
64
|
when att_rx
|
@@ -65,9 +66,7 @@ module Mascot
|
|
65
66
|
case k
|
66
67
|
when "title"
|
67
68
|
q[k.to_sym] = URI.decode(v)
|
68
|
-
|
69
|
-
# q[k.to_sym] = v.split(",").collect {|e| e.split(":").collect {|ee| ee.to_f}}
|
70
|
-
when "Ions1" # CHANGED POSITION TO GET IN THE BLOCK AND DO THE PARSE TO GET THE ARRAY
|
69
|
+
when "Ions1"
|
71
70
|
q[:peaks] = parse_mzi(v)
|
72
71
|
else
|
73
72
|
q[k.to_sym] = v
|
@@ -82,15 +81,7 @@ module Mascot
|
|
82
81
|
end
|
83
82
|
|
84
83
|
alias_method :spectrum, :query
|
85
|
-
|
86
|
-
mzi = [[],[]]
|
87
|
-
ions_str.split(",").collect do |mzpair|
|
88
|
-
tmp = mzpair.split(":").collect {|e| e.to_f}
|
89
|
-
mzi[0] << tmp[0]
|
90
|
-
mzi[1] << tmp[1]
|
91
|
-
end
|
92
|
-
mzi
|
93
|
-
end
|
84
|
+
|
94
85
|
|
95
86
|
# Go to a section of the Mascot DAT file
|
96
87
|
def goto(key)
|
@@ -104,7 +95,7 @@ module Mascot
|
|
104
95
|
# Read a section of the DAT file into memory. THIS IS NOT
|
105
96
|
# RECOMMENDED UNLESS YOU KNOW WHAT YOU ARE DOING.
|
106
97
|
#
|
107
|
-
# @param [String or Symbol] The section name
|
98
|
+
# @param key [String or Symbol] The section name
|
108
99
|
# @return [String] The section of the DAT file as a String. The section
|
109
100
|
# includes the MIME boundary and content type
|
110
101
|
# definition lines.
|
@@ -165,6 +156,7 @@ module Mascot
|
|
165
156
|
idxf = File.open(idxfn)
|
166
157
|
@idx = ::Marshal.load(idxf.read)
|
167
158
|
@boundary = @idx[:boundary]
|
159
|
+
@boundary_string = @idx[:boundary_string]
|
168
160
|
idxf.close
|
169
161
|
else
|
170
162
|
create_index()
|
@@ -178,9 +170,10 @@ module Mascot
|
|
178
170
|
# MIME header line, to parse out boundary
|
179
171
|
@dat_file.readline
|
180
172
|
@dat_file.readline =~/boundary=(\w+)$/
|
181
|
-
boundary_string = "--#{$1}"
|
182
|
-
@boundary = /#{boundary_string}/
|
173
|
+
@boundary_string = "--#{$1}"
|
174
|
+
@boundary = /#{@boundary_string}/
|
183
175
|
@idx[:boundary] = @boundary
|
176
|
+
@idx[:boundary_string] = @boundary_string
|
184
177
|
@dat_file.grep(@boundary) do |l|
|
185
178
|
break if @dat_file.eof?
|
186
179
|
section_position = @dat_file.pos - l.length
|
@@ -195,5 +188,16 @@ module Mascot
|
|
195
188
|
end
|
196
189
|
@dat_file.rewind
|
197
190
|
end
|
191
|
+
|
192
|
+
# Parse the ion string of mz/intensity peaks in Ions section
|
193
|
+
# Peaks are not ordered, so we must account for that.
|
194
|
+
def parse_mzi(ions_str)
|
195
|
+
mzi_tmp = []
|
196
|
+
ions_str.split(",").collect do |mzpair|
|
197
|
+
mzi_tmp << mzpair.split(":").collect {|e| e.to_f}
|
198
|
+
end
|
199
|
+
# now sort the mz_tmp array as ascending m/z, and return the array
|
200
|
+
mzi_tmp.sort
|
201
|
+
end
|
198
202
|
end
|
199
203
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
[&[f59.044502f1.951e5[f76.396653f1.951e5[f88.063115f1.951e5[f92.727062f1.951e5[f111.734216f1.951e5[f114.091341f1.951e5[f122.082957f1.951e5[f138.586954f1.951e5[f160.757021f1.951e5[f167.097686f1.951e5[f171.105762f1.951e5[f175.118953f1.951e5[f182.620797f1.951e5[f190.112916f1.951e5[f206.443325f1.951e5[f223.795476f1.951e5[f227.175405f1.951e5[f240.631893f1.951e5[f244.138013f1.951e5[f256.155004f1.951e5[f276.166632f1.951e5[f284.665736f1.951e5[f309.16135f1.951e5[f333.188096f1.951e5[f335.189576f1.951e5[f364.234317f1.951e5[f365.703382f1.951e5[f480.256511f1.951e5[f511.302732f1.951e5[f568.324196f1.951e5[f617.315423f1.951e5[f669.371875f1.951e5[f730.399487f1.951e5
|
@@ -17,6 +17,7 @@ class TestMascotDatMasses < TestMascotDatHelper
|
|
17
17
|
def test_masses_delta1
|
18
18
|
# delta1=15.994915,Oxidation (M)
|
19
19
|
assert_equal(15.994915,@masses.masstable[:delta1])
|
20
|
+
assert_equal(15.994915,@masses.delta1)
|
20
21
|
end
|
21
22
|
def test_masses_var_mod_is_delta1
|
22
23
|
assert_equal(15.994915,@masses.deltas[0][0])
|
@@ -24,6 +25,7 @@ class TestMascotDatMasses < TestMascotDatHelper
|
|
24
25
|
end
|
25
26
|
def test_masses_FixedMod1_mass
|
26
27
|
assert_equal(57.021464,@masses.masstable[:FixedMod1])
|
28
|
+
assert_equal(57.021464,@masses.FixedMod1)
|
27
29
|
end
|
28
30
|
|
29
31
|
def test_masses_fixed_mod_is_FixedMod1
|
data/test/test_mascot-dat.rb
CHANGED
@@ -35,10 +35,17 @@ class TestMascotDat < TestMascotDatHelper
|
|
35
35
|
assert_equal(expected_section, @dat.read_section("enzyme"))
|
36
36
|
assert_equal(expected_section, @dat.read_section(:enzyme))
|
37
37
|
end
|
38
|
-
|
38
|
+
|
39
|
+
def test_read_section_masses
|
39
40
|
expected_section = File.read("test/fixtures/masses_section.txt")
|
40
41
|
assert_equal(expected_section, @dat.read_section("masses"))
|
41
42
|
assert_equal(expected_section, @dat.read_section(:masses))
|
42
43
|
end
|
43
44
|
|
45
|
+
def test_peaks
|
46
|
+
expected_peaks = Marshal.load(File.read("test/fixtures/query23_peaks.dmp"))
|
47
|
+
query23 = @dat.query(23)
|
48
|
+
assert_equal(expected_peaks,query23[:peaks])
|
49
|
+
end
|
50
|
+
|
44
51
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mascot-dat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &70330121470000 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70330121470000
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: yard
|
27
|
-
requirement: &
|
27
|
+
requirement: &70330121469460 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70330121469460
|
36
36
|
description: Mascot DAT file format parser
|
37
37
|
email:
|
38
38
|
- angel@upenn.edu
|
@@ -61,6 +61,7 @@ files:
|
|
61
61
|
- test/fixtures/example.dat
|
62
62
|
- test/fixtures/header_section.txt
|
63
63
|
- test/fixtures/masses_section.txt
|
64
|
+
- test/fixtures/query23_peaks.dmp
|
64
65
|
- test/test_mascot-dat-enzyme.rb
|
65
66
|
- test/test_mascot-dat-header_info.rb
|
66
67
|
- test/test_mascot-dat-helper.rb
|
@@ -85,7 +86,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
86
|
version: '0'
|
86
87
|
segments:
|
87
88
|
- 0
|
88
|
-
hash: -
|
89
|
+
hash: -998556290879411337
|
89
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
91
|
none: false
|
91
92
|
requirements:
|
@@ -94,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
95
|
version: '0'
|
95
96
|
segments:
|
96
97
|
- 0
|
97
|
-
hash: -
|
98
|
+
hash: -998556290879411337
|
98
99
|
requirements: []
|
99
100
|
rubyforge_project:
|
100
101
|
rubygems_version: 1.8.11
|
@@ -106,6 +107,7 @@ test_files:
|
|
106
107
|
- test/fixtures/example.dat
|
107
108
|
- test/fixtures/header_section.txt
|
108
109
|
- test/fixtures/masses_section.txt
|
110
|
+
- test/fixtures/query23_peaks.dmp
|
109
111
|
- test/test_mascot-dat-enzyme.rb
|
110
112
|
- test/test_mascot-dat-header_info.rb
|
111
113
|
- test/test_mascot-dat-helper.rb
|