mascot-dat 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  *~
2
+ .DS_Store
2
3
  pkg/*
3
4
  *.gem
4
5
  .bundle
@@ -6,7 +6,7 @@ module Mascot
6
6
  # access this section as one big chunk in memory. It is often quite large and
7
7
  # needs to be accessed using Enumerable methods.
8
8
  #
9
- # From the Mascot documentation, results are CSV list with the following information
9
+ # From the Mascot documentation, the following represents a reasonably complete PSM
10
10
  # q1_p1_db=01 # two digit integer of the search DB index, zero filled and retarded.
11
11
  # q1_p1=missed cleavages, (–1 indicates no match)
12
12
  # peptide Mr,
@@ -76,11 +76,17 @@ module Mascot
76
76
  @file.pos = @byteoffset + @boundary_line.length
77
77
  end
78
78
 
79
+ # Return a specific {Mascot::DAT::PSM} identified for query <code>q</code> and peptide number <code>p</code>
80
+ # @param q Fixnum
81
+ # @param p Fixnum
82
+ # @return Mascot::DAT::PSM
79
83
  def psm q,p
80
84
  @file.pos = @psmidx[q][p]
81
85
  next_psm
82
86
  end
83
87
 
88
+ # Returns the next {Mascot::DAT::PSM} from the DAT file. If there is no other PSM, then it returns nil.
89
+ # @return Mascot::DAT::PSM
84
90
  def next_psm
85
91
  return nil if @file.pos >= @endbytepos
86
92
  # get the initial values for query & rank
@@ -106,7 +112,10 @@ module Mascot
106
112
  Mascot::DAT::PSM.parse(tmp)
107
113
  end
108
114
 
115
+ # Iterate through all of the {Mascot::DAT::PSM} entries in the DAT file.
116
+ # @return Enumerator
109
117
  def each
118
+ @file.pos = @byteoffset
110
119
  while @file.pos < @endbytepos
111
120
  psm = next_psm()
112
121
  next if psm.nil?
@@ -34,8 +34,9 @@ module Mascot
34
34
  end
35
35
  def self.parse psm_arr
36
36
  psm_result = self.new()
37
-
38
37
  psm_arr.each do |l|
38
+ next unless l =~ /^q/
39
+
39
40
  k,v = l.split "="
40
41
  case k
41
42
  when /^q(\d+)_p(\d+)$/
@@ -69,6 +70,7 @@ module Mascot
69
70
  psm_result.terms = v.split(":").collect {|t| t.split(",") }
70
71
  else
71
72
  # returns the smaller key
73
+ puts "****#{k}***"
72
74
  k_sym = k.slice(/q\d+_p\d+_?(.+)/,1).to_sym
73
75
  psm_result.attrs[k_sym] = v
74
76
  end
@@ -0,0 +1,118 @@
1
+ module Mascot
2
+ class DAT
3
+
4
+ # A class to represent mass spectrum query objects in Mascot DAT files.
5
+ # Here is an example:
6
+ #
7
+ # --gc0p4Jq0M2Yt08jU534c0p
8
+ # Content-Type: application/x-Mascot; name="query3"
9
+ #
10
+ # title=253%2e131203405971_503
11
+ # rtinseconds=503
12
+ # index=5
13
+ # charge=2+
14
+ # mass_min=88.063115
15
+ # mass_max=392.171066
16
+ # int_min=6.064e+05
17
+ # int_max=6.064e+05
18
+ # num_vals=10
19
+ # num_used1=-1
20
+ # Ions1=88.063115:6.064e+05,196.589171:6.064e+05,331.143454:6.064e+05,392.171066:6.064e+05,114.570773:6.064e+05,228.134269:6.064e+05,139.567707:6.064e+05,278.128138:6.064e+05,166.075365:6.064e+05,175.118953:6.064e+05
21
+ #
22
+ # Things to note are:
23
+ #
24
+ # * the spectrum title is encoded to produce nice output in HTML
25
+ # * the m/z and intensity values are given as pairs of values
26
+ # * the m/z and intensity values are not in increasing values of m/z
27
+ #
28
+ # This parser accounts for these in the attributes like so:
29
+ #
30
+ # * spectrum title is de-encoded
31
+ # * the pairs of m/z and intensity are accessible via the {#peaks} method
32
+ # * the {#peaks} are ordered in accordance to increasing m/z
33
+ # * there are {#mz} and {#intensity} methods to get the individual array of values for each
34
+ #
35
+ class Query
36
+ # The name of the query in Mascot DAT file, e.g. the MIME section header
37
+ attr_reader :name
38
+ # The spectrum title from the source mass spectrum file
39
+ attr_reader :title
40
+ # No clue what this is
41
+ attr_reader :index
42
+ # Retention time in seconds
43
+ attr_reader :rtinseconds
44
+ # Charge state of the parent MS1 ion
45
+ attr_reader :charge
46
+ # The minimum m/z of the values
47
+ attr_reader :mass_min
48
+ # The maximum m/z of the values
49
+ attr_reader :mass_max
50
+ # The minimum intensity of the values
51
+ attr_reader :int_min
52
+ # The maximum intensity of the values
53
+ attr_reader :int_max
54
+ # The number of peaks
55
+ attr_reader :num_vals
56
+ # No clue what this is
57
+ attr_reader :num_used1
58
+ # An Array of [m/z, intensity] tuples, ordered by increasing m/z values
59
+ attr_reader :peaks
60
+ # An Array of m/z values, ordered by increasing m/z
61
+ attr_reader :mz
62
+ # An Array of intensity values, ordered by the corresponding m/z value in the {#mz} Array
63
+ attr_reader :intensity
64
+
65
+ # All other attributes from DAT query sections not covered above
66
+ attr_reader :attributes
67
+
68
+ def initialize(query_str)
69
+ query_str.split(/\n/).each do |l|
70
+ next unless l =~ /(\w+)\=(.+)$/
71
+ k,v = $1,$2
72
+ case k
73
+ when "name"
74
+ @name = v.gsub('"','')
75
+ when "title"
76
+ @title = URI.decode(v)
77
+ when "index"
78
+ @index = v.to_i
79
+ when "rtinseconds"
80
+ @rtinseconds = v.to_i
81
+ when "charge"
82
+ @charge = v
83
+ when "mass_min"
84
+ @mass_min = v.to_f
85
+ when "mass_max"
86
+ @mass_max = v.to_f
87
+ when "int_min"
88
+ @int_min = v.to_f
89
+ when "int_max"
90
+ @int_max = v.to_f
91
+ when "num_vals"
92
+ @num_vals = v.to_i
93
+ when "num_used1"
94
+ @num_used1 = v.to_i
95
+ when "Ions1"
96
+ parse_ions1(v)
97
+ else
98
+ @attributes[k.to_sym] = v
99
+ end
100
+ end
101
+ end
102
+
103
+ private
104
+ def parse_ions1(ions1)
105
+ @peaks = []
106
+ ions1.split(",").collect do |mzpair|
107
+ @peaks << mzpair.split(":").collect {|e| e.to_f}
108
+ end
109
+ # now sort the mz_tmp array as ascending m/z, and return the array
110
+ @peaks.sort!
111
+ # once sorted by increasing m/z, populate the individual arrays
112
+ @mz = @peaks.collect {|p| p[0]}
113
+ @intensity = @peaks.collect {|p| p[1]}
114
+ end
115
+ end
116
+ end
117
+ end
118
+
@@ -1,6 +1,6 @@
1
1
  module Mascot
2
2
  class DAT
3
- VERSION = "0.1.3"
3
+ VERSION = "0.2.0"
4
4
  end
5
5
  end
6
6
 
data/lib/mascot/dat.rb CHANGED
@@ -6,6 +6,7 @@ require 'mascot/dat/parameters'
6
6
  require 'mascot/dat/peptides'
7
7
  require 'mascot/dat/proteins'
8
8
  require 'mascot/dat/psm'
9
+ require "mascot/dat/query"
9
10
  require 'mascot/dat/search_databases'
10
11
  require 'mascot/dat/summary'
11
12
  require 'mascot/dat/version'
@@ -48,41 +49,15 @@ module Mascot
48
49
  @dat_file.close
49
50
  end
50
51
 
51
- # Read in the query spectrum from the DAT file
52
+ # Return a specific query spectrum from the DAT file
52
53
  #
53
54
  # @param n The query spectrum numerical index
54
- # @return Hash of the spectrum. The hash has
55
+ # @return {Mascot::DAT::Query}
55
56
  def query(n)
56
- # search index for this
57
- bytepos = @idx["query#{n}".to_sym]
58
- @dat_file.pos = bytepos + @boundary_string.length
59
- att_rx = /(\w+)\=(.+)/
60
- q = {}
61
- @dat_file.each do |l|
62
- l.chomp
63
- case l
64
- when att_rx
65
- k,v = $1,$2
66
- case k
67
- when "title"
68
- q[k.to_sym] = URI.decode(v)
69
- when "Ions1"
70
- q[:peaks] = parse_mzi(v)
71
- else
72
- q[k.to_sym] = v
73
- end
74
- when @boundary
75
- break
76
- else
77
- next
78
- end
79
- end
80
- q
57
+ return Mascot::DAT::Query.new(self.read_section(:"query#{n}"))
81
58
  end
82
-
83
59
  alias_method :spectrum, :query
84
60
 
85
-
86
61
  # Go to a section of the Mascot DAT file
87
62
  def goto(key)
88
63
  if @idx.has_key?(key.to_sym)
@@ -189,15 +164,6 @@ module Mascot
189
164
  @dat_file.rewind
190
165
  end
191
166
 
192
- # Parse the ion string of mz/intensity peaks in Ions section
193
- # Peaks are not ordered, so we must account for that.
194
- def parse_mzi(ions_str)
195
- mzi_tmp = []
196
- ions_str.split(",").collect do |mzpair|
197
- mzi_tmp << mzpair.split(":").collect {|e| e.to_f}
198
- end
199
- # now sort the mz_tmp array as ascending m/z, and return the array
200
- mzi_tmp.sort
201
- end
167
+
202
168
  end
203
169
  end
@@ -0,0 +1,65 @@
1
+ require 'test_mascot-dat-helper'
2
+
3
+ class TestMascotDatQuery < TestMascotDatHelper
4
+ def setup
5
+ super
6
+ @query = @dat.query(23)
7
+ end
8
+ def test_name
9
+ assert_equal("query23", @query.name)
10
+ end
11
+ def test_title
12
+ assert_equal("281.832701459371_513",@query.title)
13
+ end
14
+ def test_rtinseconds
15
+ assert_equal(513, @query.rtinseconds)
16
+ end
17
+ def test_index
18
+ assert_equal(30,@query.index)
19
+ end
20
+ def test_charge
21
+ assert_equal("3+",@query.charge)
22
+ end
23
+ def test_mass_min
24
+ assert_equal(59.044502, @query.mass_min)
25
+ end
26
+ def test_mass_max
27
+ assert_equal(730.399487,@query.mass_max)
28
+ end
29
+ def test_int_min
30
+ assert_equal(1.951e+05, @query.int_min)
31
+ end
32
+ def test_int_max
33
+ assert_equal(1.951e+05, @query.int_max)
34
+ end
35
+ def test_num_vals
36
+ assert_equal(33,@query.num_vals)
37
+ end
38
+ def test_num_used1
39
+ assert_equal(-1, @query.num_used1)
40
+ end
41
+
42
+ def test_peaks
43
+ expected_peaks = Marshal.load(File.read("test/fixtures/query23_peaks.dmp"))
44
+ assert_equal(expected_peaks,@query.peaks)
45
+ end
46
+
47
+ def test_mz_array
48
+ mz_expected = [59.044502, 76.396653, 88.063115, 92.727062, 111.734216,
49
+ 114.091341, 122.082957, 138.586954, 160.757021, 167.097686, 171.105762,
50
+ 175.118953, 182.620797, 190.112916, 206.443325, 223.795476, 227.175405,
51
+ 240.631893, 244.138013, 256.155004, 276.166632, 284.665736, 309.16135,
52
+ 333.188096, 335.189576, 364.234317, 365.703382, 480.256511, 511.302732,
53
+ 568.324196, 617.315423, 669.371875, 730.399487]
54
+ assert_equal(mz_expected,@query.mz)
55
+ end
56
+
57
+ def test_intensity_array
58
+ intensity_expected = [195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
59
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
60
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
61
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
62
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0]
63
+ assert_equal(intensity_expected,@query.intensity)
64
+ end
65
+ end
@@ -9,6 +9,10 @@ class TestMascotDat < TestMascotDatHelper
9
9
  assert_equal(Regexp.new("--gc0p4Jq0M2Yt08jU534c0p"), @dat.boundary)
10
10
  end
11
11
 
12
+ def test_dat_boundary_string
13
+ assert_equal("--gc0p4Jq0M2Yt08jU534c0p", @dat.boundary_string)
14
+ end
15
+
12
16
  def test_dat_byteoffset_index_is_created
13
17
  File.unlink(@dat.dat_file.path + ".idx") if File.exists?(@dat.dat_file.path + ".idx")
14
18
  @dat = Mascot::DAT.open("test/fixtures/example.dat")
@@ -42,10 +46,4 @@ class TestMascotDat < TestMascotDatHelper
42
46
  assert_equal(expected_section, @dat.read_section(:masses))
43
47
  end
44
48
 
45
- def test_peaks
46
- expected_peaks = Marshal.load(File.read("test/fixtures/query23_peaks.dmp"))
47
- query23 = @dat.query(23)
48
- assert_equal(expected_peaks,query23[:peaks])
49
- end
50
-
51
49
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mascot-dat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-03 00:00:00.000000000 Z
12
+ date: 2012-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &70330121470000 !ruby/object:Gem::Requirement
16
+ requirement: &70340237935320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70330121470000
24
+ version_requirements: *70340237935320
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: yard
27
- requirement: &70330121469460 !ruby/object:Gem::Requirement
27
+ requirement: &70340237934880 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70330121469460
35
+ version_requirements: *70340237934880
36
36
  description: Mascot DAT file format parser
37
37
  email:
38
38
  - angel@upenn.edu
@@ -53,6 +53,7 @@ files:
53
53
  - lib/mascot/dat/peptides.rb
54
54
  - lib/mascot/dat/proteins.rb
55
55
  - lib/mascot/dat/psm.rb
56
+ - lib/mascot/dat/query.rb
56
57
  - lib/mascot/dat/search_databases.rb
57
58
  - lib/mascot/dat/summary.rb
58
59
  - lib/mascot/dat/version.rb
@@ -69,6 +70,7 @@ files:
69
70
  - test/test_mascot-dat-parameters.rb
70
71
  - test/test_mascot-dat-peptides.rb
71
72
  - test/test_mascot-dat-proteins.rb
73
+ - test/test_mascot-dat-query.rb
72
74
  - test/test_mascot-dat-search_databases.rb
73
75
  - test/test_mascot-dat-summary.rb
74
76
  - test/test_mascot-dat.rb
@@ -86,7 +88,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
86
88
  version: '0'
87
89
  segments:
88
90
  - 0
89
- hash: -998556290879411337
91
+ hash: -3741896018441832167
90
92
  required_rubygems_version: !ruby/object:Gem::Requirement
91
93
  none: false
92
94
  requirements:
@@ -95,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
95
97
  version: '0'
96
98
  segments:
97
99
  - 0
98
- hash: -998556290879411337
100
+ hash: -3741896018441832167
99
101
  requirements: []
100
102
  rubyforge_project:
101
103
  rubygems_version: 1.8.11
@@ -115,6 +117,7 @@ test_files:
115
117
  - test/test_mascot-dat-parameters.rb
116
118
  - test/test_mascot-dat-peptides.rb
117
119
  - test/test_mascot-dat-proteins.rb
120
+ - test/test_mascot-dat-query.rb
118
121
  - test/test_mascot-dat-search_databases.rb
119
122
  - test/test_mascot-dat-summary.rb
120
123
  - test/test_mascot-dat.rb