mascot-dat 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  *~
2
+ .DS_Store
2
3
  pkg/*
3
4
  *.gem
4
5
  .bundle
@@ -6,7 +6,7 @@ module Mascot
6
6
  # access this section as one big chunk in memory. It is often quite large and
7
7
  # needs to be accessed using Enumerable methods.
8
8
  #
9
- # From the Mascot documentation, results are CSV list with the following information
9
+ # From the Mascot documentation, the following represents a reasonably complete PSM
10
10
  # q1_p1_db=01 # two digit integer of the search DB index, zero filled and retarded.
11
11
  # q1_p1=missed cleavages, (–1 indicates no match)
12
12
  # peptide Mr,
@@ -76,11 +76,17 @@ module Mascot
76
76
  @file.pos = @byteoffset + @boundary_line.length
77
77
  end
78
78
 
79
+ # Return a specific {Mascot::DAT::PSM} identified for query <code>q</code> and peptide number <code>p</code>
80
+ # @param q Fixnum
81
+ # @param p Fixnum
82
+ # @return Mascot::DAT::PSM
79
83
  def psm q,p
80
84
  @file.pos = @psmidx[q][p]
81
85
  next_psm
82
86
  end
83
87
 
88
+ # Returns the next {Mascot::DAT::PSM} from the DAT file. If there is no other PSM, then it returns nil.
89
+ # @return Mascot::DAT::PSM
84
90
  def next_psm
85
91
  return nil if @file.pos >= @endbytepos
86
92
  # get the initial values for query & rank
@@ -106,7 +112,10 @@ module Mascot
106
112
  Mascot::DAT::PSM.parse(tmp)
107
113
  end
108
114
 
115
+ # Iterate through all of the {Mascot::DAT::PSM} entries in the DAT file.
116
+ # @return Enumerator
109
117
  def each
118
+ @file.pos = @byteoffset
110
119
  while @file.pos < @endbytepos
111
120
  psm = next_psm()
112
121
  next if psm.nil?
@@ -34,8 +34,9 @@ module Mascot
34
34
  end
35
35
  def self.parse psm_arr
36
36
  psm_result = self.new()
37
-
38
37
  psm_arr.each do |l|
38
+ next unless l =~ /^q/
39
+
39
40
  k,v = l.split "="
40
41
  case k
41
42
  when /^q(\d+)_p(\d+)$/
@@ -69,6 +70,7 @@ module Mascot
69
70
  psm_result.terms = v.split(":").collect {|t| t.split(",") }
70
71
  else
71
72
  # returns the smaller key
73
+ puts "****#{k}***"
72
74
  k_sym = k.slice(/q\d+_p\d+_?(.+)/,1).to_sym
73
75
  psm_result.attrs[k_sym] = v
74
76
  end
@@ -0,0 +1,118 @@
1
+ module Mascot
2
+ class DAT
3
+
4
+ # A class to represent mass spectrum query objects in Mascot DAT files.
5
+ # Here is an example:
6
+ #
7
+ # --gc0p4Jq0M2Yt08jU534c0p
8
+ # Content-Type: application/x-Mascot; name="query3"
9
+ #
10
+ # title=253%2e131203405971_503
11
+ # rtinseconds=503
12
+ # index=5
13
+ # charge=2+
14
+ # mass_min=88.063115
15
+ # mass_max=392.171066
16
+ # int_min=6.064e+05
17
+ # int_max=6.064e+05
18
+ # num_vals=10
19
+ # num_used1=-1
20
+ # Ions1=88.063115:6.064e+05,196.589171:6.064e+05,331.143454:6.064e+05,392.171066:6.064e+05,114.570773:6.064e+05,228.134269:6.064e+05,139.567707:6.064e+05,278.128138:6.064e+05,166.075365:6.064e+05,175.118953:6.064e+05
21
+ #
22
+ # Things to note are:
23
+ #
24
+ # * the spectrum title is encoded to produce nice output in HTML
25
+ # * the m/z and intensity values are given as pairs of values
26
+ # * the m/z and intensity values are not in increasing values of m/z
27
+ #
28
+ # This parser accounts for these in the attributes like so:
29
+ #
30
+ # * spectrum title is de-encoded
31
+ # * the pairs of m/z and intensity are accessible via the {#peaks} method
32
+ # * the {#peaks} are ordered in accordance to increasing m/z
33
+ # * there are {#mz} and {#intensity} methods to get the individual array of values for each
34
+ #
35
+ class Query
36
+ # The name of the query in Mascot DAT file, e.g. the MIME section header
37
+ attr_reader :name
38
+ # The spectrum title from the source mass spectrum file
39
+ attr_reader :title
40
+ # No clue what this is
41
+ attr_reader :index
42
+ # Retention time in seconds
43
+ attr_reader :rtinseconds
44
+ # Charge state of the parent MS1 ion
45
+ attr_reader :charge
46
+ # The minimum m/z of the values
47
+ attr_reader :mass_min
48
+ # The maximum m/z of the values
49
+ attr_reader :mass_max
50
+ # The minimum intensity of the values
51
+ attr_reader :int_min
52
+ # The maximum intensity of the values
53
+ attr_reader :int_max
54
+ # The number of peaks
55
+ attr_reader :num_vals
56
+ # No clue what this is
57
+ attr_reader :num_used1
58
+ # An Array of [m/z, intensity] tuples, ordered by increasing m/z values
59
+ attr_reader :peaks
60
+ # An Array of m/z values, ordered by increasing m/z
61
+ attr_reader :mz
62
+ # An Array of intensity values, ordered by the corresponding m/z value in the {#mz} Array
63
+ attr_reader :intensity
64
+
65
+ # All other attributes from DAT query sections not covered above
66
+ attr_reader :attributes
67
+
68
+ def initialize(query_str)
69
+ query_str.split(/\n/).each do |l|
70
+ next unless l =~ /(\w+)\=(.+)$/
71
+ k,v = $1,$2
72
+ case k
73
+ when "name"
74
+ @name = v.gsub('"','')
75
+ when "title"
76
+ @title = URI.decode(v)
77
+ when "index"
78
+ @index = v.to_i
79
+ when "rtinseconds"
80
+ @rtinseconds = v.to_i
81
+ when "charge"
82
+ @charge = v
83
+ when "mass_min"
84
+ @mass_min = v.to_f
85
+ when "mass_max"
86
+ @mass_max = v.to_f
87
+ when "int_min"
88
+ @int_min = v.to_f
89
+ when "int_max"
90
+ @int_max = v.to_f
91
+ when "num_vals"
92
+ @num_vals = v.to_i
93
+ when "num_used1"
94
+ @num_used1 = v.to_i
95
+ when "Ions1"
96
+ parse_ions1(v)
97
+ else
98
+ @attributes[k.to_sym] = v
99
+ end
100
+ end
101
+ end
102
+
103
+ private
104
+ def parse_ions1(ions1)
105
+ @peaks = []
106
+ ions1.split(",").collect do |mzpair|
107
+ @peaks << mzpair.split(":").collect {|e| e.to_f}
108
+ end
109
+ # now sort the mz_tmp array as ascending m/z, and return the array
110
+ @peaks.sort!
111
+ # once sorted by increasing m/z, populate the individual arrays
112
+ @mz = @peaks.collect {|p| p[0]}
113
+ @intensity = @peaks.collect {|p| p[1]}
114
+ end
115
+ end
116
+ end
117
+ end
118
+
@@ -1,6 +1,6 @@
1
1
  module Mascot
2
2
  class DAT
3
- VERSION = "0.1.3"
3
+ VERSION = "0.2.0"
4
4
  end
5
5
  end
6
6
 
data/lib/mascot/dat.rb CHANGED
@@ -6,6 +6,7 @@ require 'mascot/dat/parameters'
6
6
  require 'mascot/dat/peptides'
7
7
  require 'mascot/dat/proteins'
8
8
  require 'mascot/dat/psm'
9
+ require "mascot/dat/query"
9
10
  require 'mascot/dat/search_databases'
10
11
  require 'mascot/dat/summary'
11
12
  require 'mascot/dat/version'
@@ -48,41 +49,15 @@ module Mascot
48
49
  @dat_file.close
49
50
  end
50
51
 
51
- # Read in the query spectrum from the DAT file
52
+ # Return a specific query spectrum from the DAT file
52
53
  #
53
54
  # @param n The query spectrum numerical index
54
- # @return Hash of the spectrum. The hash has
55
+ # @return {Mascot::DAT::Query}
55
56
  def query(n)
56
- # search index for this
57
- bytepos = @idx["query#{n}".to_sym]
58
- @dat_file.pos = bytepos + @boundary_string.length
59
- att_rx = /(\w+)\=(.+)/
60
- q = {}
61
- @dat_file.each do |l|
62
- l.chomp
63
- case l
64
- when att_rx
65
- k,v = $1,$2
66
- case k
67
- when "title"
68
- q[k.to_sym] = URI.decode(v)
69
- when "Ions1"
70
- q[:peaks] = parse_mzi(v)
71
- else
72
- q[k.to_sym] = v
73
- end
74
- when @boundary
75
- break
76
- else
77
- next
78
- end
79
- end
80
- q
57
+ return Mascot::DAT::Query.new(self.read_section(:"query#{n}"))
81
58
  end
82
-
83
59
  alias_method :spectrum, :query
84
60
 
85
-
86
61
  # Go to a section of the Mascot DAT file
87
62
  def goto(key)
88
63
  if @idx.has_key?(key.to_sym)
@@ -189,15 +164,6 @@ module Mascot
189
164
  @dat_file.rewind
190
165
  end
191
166
 
192
- # Parse the ion string of mz/intensity peaks in Ions section
193
- # Peaks are not ordered, so we must account for that.
194
- def parse_mzi(ions_str)
195
- mzi_tmp = []
196
- ions_str.split(",").collect do |mzpair|
197
- mzi_tmp << mzpair.split(":").collect {|e| e.to_f}
198
- end
199
- # now sort the mz_tmp array as ascending m/z, and return the array
200
- mzi_tmp.sort
201
- end
167
+
202
168
  end
203
169
  end
@@ -0,0 +1,65 @@
1
+ require 'test_mascot-dat-helper'
2
+
3
+ class TestMascotDatQuery < TestMascotDatHelper
4
+ def setup
5
+ super
6
+ @query = @dat.query(23)
7
+ end
8
+ def test_name
9
+ assert_equal("query23", @query.name)
10
+ end
11
+ def test_title
12
+ assert_equal("281.832701459371_513",@query.title)
13
+ end
14
+ def test_rtinseconds
15
+ assert_equal(513, @query.rtinseconds)
16
+ end
17
+ def test_index
18
+ assert_equal(30,@query.index)
19
+ end
20
+ def test_charge
21
+ assert_equal("3+",@query.charge)
22
+ end
23
+ def test_mass_min
24
+ assert_equal(59.044502, @query.mass_min)
25
+ end
26
+ def test_mass_max
27
+ assert_equal(730.399487,@query.mass_max)
28
+ end
29
+ def test_int_min
30
+ assert_equal(1.951e+05, @query.int_min)
31
+ end
32
+ def test_int_max
33
+ assert_equal(1.951e+05, @query.int_max)
34
+ end
35
+ def test_num_vals
36
+ assert_equal(33,@query.num_vals)
37
+ end
38
+ def test_num_used1
39
+ assert_equal(-1, @query.num_used1)
40
+ end
41
+
42
+ def test_peaks
43
+ expected_peaks = Marshal.load(File.read("test/fixtures/query23_peaks.dmp"))
44
+ assert_equal(expected_peaks,@query.peaks)
45
+ end
46
+
47
+ def test_mz_array
48
+ mz_expected = [59.044502, 76.396653, 88.063115, 92.727062, 111.734216,
49
+ 114.091341, 122.082957, 138.586954, 160.757021, 167.097686, 171.105762,
50
+ 175.118953, 182.620797, 190.112916, 206.443325, 223.795476, 227.175405,
51
+ 240.631893, 244.138013, 256.155004, 276.166632, 284.665736, 309.16135,
52
+ 333.188096, 335.189576, 364.234317, 365.703382, 480.256511, 511.302732,
53
+ 568.324196, 617.315423, 669.371875, 730.399487]
54
+ assert_equal(mz_expected,@query.mz)
55
+ end
56
+
57
+ def test_intensity_array
58
+ intensity_expected = [195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
59
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
60
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
61
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0,
62
+ 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0, 195100.0]
63
+ assert_equal(intensity_expected,@query.intensity)
64
+ end
65
+ end
@@ -9,6 +9,10 @@ class TestMascotDat < TestMascotDatHelper
9
9
  assert_equal(Regexp.new("--gc0p4Jq0M2Yt08jU534c0p"), @dat.boundary)
10
10
  end
11
11
 
12
+ def test_dat_boundary_string
13
+ assert_equal("--gc0p4Jq0M2Yt08jU534c0p", @dat.boundary_string)
14
+ end
15
+
12
16
  def test_dat_byteoffset_index_is_created
13
17
  File.unlink(@dat.dat_file.path + ".idx") if File.exists?(@dat.dat_file.path + ".idx")
14
18
  @dat = Mascot::DAT.open("test/fixtures/example.dat")
@@ -42,10 +46,4 @@ class TestMascotDat < TestMascotDatHelper
42
46
  assert_equal(expected_section, @dat.read_section(:masses))
43
47
  end
44
48
 
45
- def test_peaks
46
- expected_peaks = Marshal.load(File.read("test/fixtures/query23_peaks.dmp"))
47
- query23 = @dat.query(23)
48
- assert_equal(expected_peaks,query23[:peaks])
49
- end
50
-
51
49
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mascot-dat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-03 00:00:00.000000000 Z
12
+ date: 2012-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &70330121470000 !ruby/object:Gem::Requirement
16
+ requirement: &70340237935320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70330121470000
24
+ version_requirements: *70340237935320
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: yard
27
- requirement: &70330121469460 !ruby/object:Gem::Requirement
27
+ requirement: &70340237934880 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70330121469460
35
+ version_requirements: *70340237934880
36
36
  description: Mascot DAT file format parser
37
37
  email:
38
38
  - angel@upenn.edu
@@ -53,6 +53,7 @@ files:
53
53
  - lib/mascot/dat/peptides.rb
54
54
  - lib/mascot/dat/proteins.rb
55
55
  - lib/mascot/dat/psm.rb
56
+ - lib/mascot/dat/query.rb
56
57
  - lib/mascot/dat/search_databases.rb
57
58
  - lib/mascot/dat/summary.rb
58
59
  - lib/mascot/dat/version.rb
@@ -69,6 +70,7 @@ files:
69
70
  - test/test_mascot-dat-parameters.rb
70
71
  - test/test_mascot-dat-peptides.rb
71
72
  - test/test_mascot-dat-proteins.rb
73
+ - test/test_mascot-dat-query.rb
72
74
  - test/test_mascot-dat-search_databases.rb
73
75
  - test/test_mascot-dat-summary.rb
74
76
  - test/test_mascot-dat.rb
@@ -86,7 +88,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
86
88
  version: '0'
87
89
  segments:
88
90
  - 0
89
- hash: -998556290879411337
91
+ hash: -3741896018441832167
90
92
  required_rubygems_version: !ruby/object:Gem::Requirement
91
93
  none: false
92
94
  requirements:
@@ -95,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
95
97
  version: '0'
96
98
  segments:
97
99
  - 0
98
- hash: -998556290879411337
100
+ hash: -3741896018441832167
99
101
  requirements: []
100
102
  rubyforge_project:
101
103
  rubygems_version: 1.8.11
@@ -115,6 +117,7 @@ test_files:
115
117
  - test/test_mascot-dat-parameters.rb
116
118
  - test/test_mascot-dat-peptides.rb
117
119
  - test/test_mascot-dat-proteins.rb
120
+ - test/test_mascot-dat-query.rb
118
121
  - test/test_mascot-dat-search_databases.rb
119
122
  - test/test_mascot-dat-summary.rb
120
123
  - test/test_mascot-dat.rb