mascot-dat 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/Rakefile +3 -4
- data/lib/mascot/dat/masses.rb +52 -12
- data/lib/mascot/dat/psm.rb +0 -2
- data/lib/mascot/dat/version.rb +1 -1
- data/lib/mascot/dat.rb +22 -18
- data/test/fixtures/query23_peaks.dmp +1 -0
- data/test/test_mascot-dat-helper.rb +3 -0
- data/test/test_mascot-dat-masses.rb +2 -0
- data/test/test_mascot-dat.rb +8 -1
- metadata +10 -8
data/.gitignore
CHANGED
data/Rakefile
CHANGED
@@ -8,11 +8,10 @@ Rake::TestTask.new do |t|
|
|
8
8
|
t.verbose = true
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
11
|
# documentation
|
13
12
|
require 'yard'
|
14
13
|
require 'yard/rake/yardoc_task'
|
15
14
|
YARD::Rake::YardocTask.new do |t|
|
16
|
-
t.files = ['lib/**/*.rb']
|
17
|
-
t.options =
|
18
|
-
end
|
15
|
+
t.files = ['lib/**/*.rb', '-' , "README.rdoc","LICENSE"]
|
16
|
+
t.options = ["-r","README.rdoc"]
|
17
|
+
end
|
data/lib/mascot/dat/masses.rb
CHANGED
@@ -1,23 +1,60 @@
|
|
1
1
|
module Mascot
|
2
2
|
class DAT
|
3
|
+
|
4
|
+
# A Hash of the mass table section. See the {#masstable masstable} instance method for details.
|
3
5
|
class Masses
|
6
|
+
# The main table of masses. Given the following examples from a DAT file:
|
7
|
+
#
|
8
|
+
# W=186.079313
|
9
|
+
# X=111.000000
|
10
|
+
# Y=163.063329
|
11
|
+
# Z=128.550590
|
12
|
+
# Hydrogen=1.007825
|
13
|
+
# Carbon=12.000000
|
14
|
+
#
|
15
|
+
# You can access the value for Hydrogen as:
|
16
|
+
# mydat.masses.masstable[:Hydorgen] # => 1.007825
|
17
|
+
# or
|
18
|
+
# mydat.masses.m[:Hydrogen] # => 1.007825
|
19
|
+
# or
|
20
|
+
# mydat.masses.hydorgen # => 1.007825
|
21
|
+
attr_reader :masstable
|
22
|
+
# def masstable
|
23
|
+
# @masstable
|
24
|
+
# end
|
25
|
+
alias_method :m, :masstable
|
4
26
|
|
5
|
-
#
|
27
|
+
# A subset of the mass table defining the variable modications. For
|
28
|
+
# example, given the following delta in a DAT file:
|
29
|
+
#
|
30
|
+
# delta1=15.994915,Oxidation (M)
|
31
|
+
#
|
32
|
+
# Then the following gets defined:
|
33
|
+
#
|
34
|
+
# @deltas = [[15.994915,"Oxidation (M)"], ... ]
|
35
|
+
#
|
6
36
|
attr_reader :deltas
|
37
|
+
alias_method :mods, :deltas
|
38
|
+
alias_method :d, :deltas
|
7
39
|
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
# A Hash of the mass table. E.g.
|
40
|
+
# A subset of the mass table defining the fixed modifications. For
|
41
|
+
# example:
|
12
42
|
#
|
13
|
-
#
|
43
|
+
# FixedMod1=57.021464,Carbamidomethyl (C)
|
44
|
+
# FixedModResidues1=C
|
14
45
|
#
|
15
|
-
|
46
|
+
# Then the following gets defined:
|
47
|
+
#
|
48
|
+
# @fixed_modifications = [[57.021464, "Carbamidomethyl (C)", "C"], ...]
|
49
|
+
#
|
50
|
+
attr_reader :fixed_modifications
|
51
|
+
alias_method :fixed_mods, :fixed_modifications
|
52
|
+
alias_method :f, :fixed_modifications
|
16
53
|
|
17
54
|
def initialize masses_section
|
18
55
|
@masstable = {}
|
19
56
|
@deltas = []
|
20
|
-
@
|
57
|
+
@fixed_modifications = []
|
21
58
|
|
22
59
|
masses_section.split("\n").each do |l|
|
23
60
|
k,v = l.split("=")
|
@@ -32,16 +69,19 @@ module Mascot
|
|
32
69
|
idx = $2.to_i - 1
|
33
70
|
if $1.empty?
|
34
71
|
# new fixed mod record
|
35
|
-
@
|
36
|
-
@
|
37
|
-
@masstable[k.to_sym] = @
|
72
|
+
@fixed_modifications[idx] = v.split(",")
|
73
|
+
@fixed_modifications[idx][0] = @fixed_modifications[idx][0].to_f
|
74
|
+
@masstable[k.to_sym] = @fixed_modifications[idx][0]
|
38
75
|
else
|
39
76
|
# append the modified residue to the array
|
40
|
-
@
|
77
|
+
@fixed_modifications[idx] << v
|
41
78
|
end
|
42
79
|
else
|
43
80
|
@masstable[k.to_sym] = v.to_f
|
44
81
|
end
|
82
|
+
@masstable.keys.each do |m|
|
83
|
+
self.class.send(:define_method,m, lambda { @masstable[m] })
|
84
|
+
end
|
45
85
|
end
|
46
86
|
end
|
47
87
|
end
|
data/lib/mascot/dat/psm.rb
CHANGED
@@ -39,10 +39,8 @@ module Mascot
|
|
39
39
|
k,v = l.split "="
|
40
40
|
case k
|
41
41
|
when /^q(\d+)_p(\d+)$/
|
42
|
-
# just put query & rank in the beginning, because $1, $2 return zero after the split
|
43
42
|
psm_result.query = $1.to_i
|
44
43
|
psm_result.rank = $2.to_i
|
45
|
-
#main result, must split value
|
46
44
|
psm_vals, prots = v.split(";")
|
47
45
|
psm_vals = psm_vals.split(',')
|
48
46
|
psm_result.missed_cleavages= psm_vals[0].to_i
|
data/lib/mascot/dat/version.rb
CHANGED
data/lib/mascot/dat.rb
CHANGED
@@ -24,6 +24,7 @@ module Mascot
|
|
24
24
|
class DAT
|
25
25
|
attr_reader :idx
|
26
26
|
attr_reader :boundary
|
27
|
+
attr_reader :boundary_string
|
27
28
|
attr_reader :dat_file
|
28
29
|
SECTIONS = ["summary", "decoy_summary", "et_summary", "parameters",
|
29
30
|
"peptides", "decoy_peptides", "et_peptides",
|
@@ -34,6 +35,7 @@ module Mascot
|
|
34
35
|
@dat_file = File.open(dat_file_path)
|
35
36
|
@idx = {}
|
36
37
|
@boundary = nil
|
38
|
+
@boundary_string = nil
|
37
39
|
@cache_index = cache_index
|
38
40
|
parse_index
|
39
41
|
end
|
@@ -53,11 +55,10 @@ module Mascot
|
|
53
55
|
def query(n)
|
54
56
|
# search index for this
|
55
57
|
bytepos = @idx["query#{n}".to_sym]
|
56
|
-
@dat_file.pos = bytepos
|
57
|
-
@dat_file.readline # ADDED
|
58
|
+
@dat_file.pos = bytepos + @boundary_string.length
|
58
59
|
att_rx = /(\w+)\=(.+)/
|
59
60
|
q = {}
|
60
|
-
each do |l|
|
61
|
+
@dat_file.each do |l|
|
61
62
|
l.chomp
|
62
63
|
case l
|
63
64
|
when att_rx
|
@@ -65,9 +66,7 @@ module Mascot
|
|
65
66
|
case k
|
66
67
|
when "title"
|
67
68
|
q[k.to_sym] = URI.decode(v)
|
68
|
-
|
69
|
-
# q[k.to_sym] = v.split(",").collect {|e| e.split(":").collect {|ee| ee.to_f}}
|
70
|
-
when "Ions1" # CHANGED POSITION TO GET IN THE BLOCK AND DO THE PARSE TO GET THE ARRAY
|
69
|
+
when "Ions1"
|
71
70
|
q[:peaks] = parse_mzi(v)
|
72
71
|
else
|
73
72
|
q[k.to_sym] = v
|
@@ -82,15 +81,7 @@ module Mascot
|
|
82
81
|
end
|
83
82
|
|
84
83
|
alias_method :spectrum, :query
|
85
|
-
|
86
|
-
mzi = [[],[]]
|
87
|
-
ions_str.split(",").collect do |mzpair|
|
88
|
-
tmp = mzpair.split(":").collect {|e| e.to_f}
|
89
|
-
mzi[0] << tmp[0]
|
90
|
-
mzi[1] << tmp[1]
|
91
|
-
end
|
92
|
-
mzi
|
93
|
-
end
|
84
|
+
|
94
85
|
|
95
86
|
# Go to a section of the Mascot DAT file
|
96
87
|
def goto(key)
|
@@ -104,7 +95,7 @@ module Mascot
|
|
104
95
|
# Read a section of the DAT file into memory. THIS IS NOT
|
105
96
|
# RECOMMENDED UNLESS YOU KNOW WHAT YOU ARE DOING.
|
106
97
|
#
|
107
|
-
# @param [String or Symbol] The section name
|
98
|
+
# @param key [String or Symbol] The section name
|
108
99
|
# @return [String] The section of the DAT file as a String. The section
|
109
100
|
# includes the MIME boundary and content type
|
110
101
|
# definition lines.
|
@@ -165,6 +156,7 @@ module Mascot
|
|
165
156
|
idxf = File.open(idxfn)
|
166
157
|
@idx = ::Marshal.load(idxf.read)
|
167
158
|
@boundary = @idx[:boundary]
|
159
|
+
@boundary_string = @idx[:boundary_string]
|
168
160
|
idxf.close
|
169
161
|
else
|
170
162
|
create_index()
|
@@ -178,9 +170,10 @@ module Mascot
|
|
178
170
|
# MIME header line, to parse out boundary
|
179
171
|
@dat_file.readline
|
180
172
|
@dat_file.readline =~/boundary=(\w+)$/
|
181
|
-
boundary_string = "--#{$1}"
|
182
|
-
@boundary = /#{boundary_string}/
|
173
|
+
@boundary_string = "--#{$1}"
|
174
|
+
@boundary = /#{@boundary_string}/
|
183
175
|
@idx[:boundary] = @boundary
|
176
|
+
@idx[:boundary_string] = @boundary_string
|
184
177
|
@dat_file.grep(@boundary) do |l|
|
185
178
|
break if @dat_file.eof?
|
186
179
|
section_position = @dat_file.pos - l.length
|
@@ -195,5 +188,16 @@ module Mascot
|
|
195
188
|
end
|
196
189
|
@dat_file.rewind
|
197
190
|
end
|
191
|
+
|
192
|
+
# Parse the ion string of mz/intensity peaks in Ions section
|
193
|
+
# Peaks are not ordered, so we must account for that.
|
194
|
+
def parse_mzi(ions_str)
|
195
|
+
mzi_tmp = []
|
196
|
+
ions_str.split(",").collect do |mzpair|
|
197
|
+
mzi_tmp << mzpair.split(":").collect {|e| e.to_f}
|
198
|
+
end
|
199
|
+
# now sort the mz_tmp array as ascending m/z, and return the array
|
200
|
+
mzi_tmp.sort
|
201
|
+
end
|
198
202
|
end
|
199
203
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
[&[f59.044502f1.951e5[f76.396653f1.951e5[f88.063115f1.951e5[f92.727062f1.951e5[f111.734216f1.951e5[f114.091341f1.951e5[f122.082957f1.951e5[f138.586954f1.951e5[f160.757021f1.951e5[f167.097686f1.951e5[f171.105762f1.951e5[f175.118953f1.951e5[f182.620797f1.951e5[f190.112916f1.951e5[f206.443325f1.951e5[f223.795476f1.951e5[f227.175405f1.951e5[f240.631893f1.951e5[f244.138013f1.951e5[f256.155004f1.951e5[f276.166632f1.951e5[f284.665736f1.951e5[f309.16135f1.951e5[f333.188096f1.951e5[f335.189576f1.951e5[f364.234317f1.951e5[f365.703382f1.951e5[f480.256511f1.951e5[f511.302732f1.951e5[f568.324196f1.951e5[f617.315423f1.951e5[f669.371875f1.951e5[f730.399487f1.951e5
|
@@ -17,6 +17,7 @@ class TestMascotDatMasses < TestMascotDatHelper
|
|
17
17
|
def test_masses_delta1
|
18
18
|
# delta1=15.994915,Oxidation (M)
|
19
19
|
assert_equal(15.994915,@masses.masstable[:delta1])
|
20
|
+
assert_equal(15.994915,@masses.delta1)
|
20
21
|
end
|
21
22
|
def test_masses_var_mod_is_delta1
|
22
23
|
assert_equal(15.994915,@masses.deltas[0][0])
|
@@ -24,6 +25,7 @@ class TestMascotDatMasses < TestMascotDatHelper
|
|
24
25
|
end
|
25
26
|
def test_masses_FixedMod1_mass
|
26
27
|
assert_equal(57.021464,@masses.masstable[:FixedMod1])
|
28
|
+
assert_equal(57.021464,@masses.FixedMod1)
|
27
29
|
end
|
28
30
|
|
29
31
|
def test_masses_fixed_mod_is_FixedMod1
|
data/test/test_mascot-dat.rb
CHANGED
@@ -35,10 +35,17 @@ class TestMascotDat < TestMascotDatHelper
|
|
35
35
|
assert_equal(expected_section, @dat.read_section("enzyme"))
|
36
36
|
assert_equal(expected_section, @dat.read_section(:enzyme))
|
37
37
|
end
|
38
|
-
|
38
|
+
|
39
|
+
def test_read_section_masses
|
39
40
|
expected_section = File.read("test/fixtures/masses_section.txt")
|
40
41
|
assert_equal(expected_section, @dat.read_section("masses"))
|
41
42
|
assert_equal(expected_section, @dat.read_section(:masses))
|
42
43
|
end
|
43
44
|
|
45
|
+
def test_peaks
|
46
|
+
expected_peaks = Marshal.load(File.read("test/fixtures/query23_peaks.dmp"))
|
47
|
+
query23 = @dat.query(23)
|
48
|
+
assert_equal(expected_peaks,query23[:peaks])
|
49
|
+
end
|
50
|
+
|
44
51
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mascot-dat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &70330121470000 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70330121470000
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: yard
|
27
|
-
requirement: &
|
27
|
+
requirement: &70330121469460 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70330121469460
|
36
36
|
description: Mascot DAT file format parser
|
37
37
|
email:
|
38
38
|
- angel@upenn.edu
|
@@ -61,6 +61,7 @@ files:
|
|
61
61
|
- test/fixtures/example.dat
|
62
62
|
- test/fixtures/header_section.txt
|
63
63
|
- test/fixtures/masses_section.txt
|
64
|
+
- test/fixtures/query23_peaks.dmp
|
64
65
|
- test/test_mascot-dat-enzyme.rb
|
65
66
|
- test/test_mascot-dat-header_info.rb
|
66
67
|
- test/test_mascot-dat-helper.rb
|
@@ -85,7 +86,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
86
|
version: '0'
|
86
87
|
segments:
|
87
88
|
- 0
|
88
|
-
hash: -
|
89
|
+
hash: -998556290879411337
|
89
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
91
|
none: false
|
91
92
|
requirements:
|
@@ -94,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
95
|
version: '0'
|
95
96
|
segments:
|
96
97
|
- 0
|
97
|
-
hash: -
|
98
|
+
hash: -998556290879411337
|
98
99
|
requirements: []
|
99
100
|
rubyforge_project:
|
100
101
|
rubygems_version: 1.8.11
|
@@ -106,6 +107,7 @@ test_files:
|
|
106
107
|
- test/fixtures/example.dat
|
107
108
|
- test/fixtures/header_section.txt
|
108
109
|
- test/fixtures/masses_section.txt
|
110
|
+
- test/fixtures/query23_peaks.dmp
|
109
111
|
- test/test_mascot-dat-enzyme.rb
|
110
112
|
- test/test_mascot-dat-header_info.rb
|
111
113
|
- test/test_mascot-dat-helper.rb
|