mspire-mascot-dat 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2013 Brigham Young University
2
+ Authored by: John T. Prince
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # mspire-mascot-dat
2
+
3
+ Reads the mascot search engine .dat results file.
4
+
5
+ ## Examples
6
+
7
+ ```ruby
8
+ require 'mspire-mascot-dat'
9
+
10
+ Mspire::Mascot::Dat.open(file.dat) do |dat|
11
+ dat.sections # => [:parameters, :masses, :unimod, :enzyme ...]
12
+ end
13
+
14
+ ```
15
+ ### each peptide
16
+
17
+ #### every peptide hit
18
+
19
+ ```ruby
20
+ dat.each_peptide do |pephit|
21
+ pephit.missed_cleavages # => an Integer
22
+ pephit.ions_score # => a Float
23
+ ...
24
+ end
25
+ ```
26
+
27
+ #### every decoy peptide hit
28
+
29
+ ```ruby
30
+ dat.each_peptide(false) {|pephit| ... }
31
+ ```
32
+
33
+ #### each top peptide hit
34
+
35
+ ```ruby
36
+ dat.each_peptide(true, 1) {|pephit| ... }
37
+ ```
38
+
39
+ ## Further Info
40
+
41
+ See Mascot's Installation & Setup Manual' for detailed information about the
42
+ .dat format itself.
43
+
44
+ ## Copyright
45
+
46
+ MIT. See LICENSE.txt
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+
6
+ require 'jeweler'
7
+ Jeweler::Tasks.new do |gem|
8
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
9
+ gem.name = "mspire-mascot-dat"
10
+ gem.homepage = "http://github.com/princelab/mspire-mascot-dat"
11
+ gem.license = "MIT"
12
+ gem.summary = %Q{Reads mascot dat files for mspire library.}
13
+ gem.description = %Q{Reads mascot dat files with gusto for mspire library.}
14
+ gem.email = "jtprince@gmail.com"
15
+ gem.authors = ["John T. Prince"]
16
+ gem.add_dependency "elif", "~> 0.1.0"
17
+ gem.add_development_dependency "rspec", "~> 2.8.0"
18
+ gem.add_development_dependency "rdoc", "~> 3.12"
19
+ gem.add_development_dependency "jeweler", "~> 1.8.4"
20
+ end
21
+ Jeweler::RubygemsDotOrgTasks.new
22
+
23
+ require 'rspec/core'
24
+ require 'rspec/core/rake_task'
25
+ RSpec::Core::RakeTask.new(:spec) do |spec|
26
+ spec.pattern = FileList['spec/**/*_spec.rb']
27
+ end
28
+
29
+ task :default => :spec
30
+
31
+ require 'rdoc/task'
32
+ Rake::RDocTask.new do |rdoc|
33
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
34
+
35
+ rdoc.rdoc_dir = 'rdoc'
36
+ rdoc.title = "mspire-mascot-dat #{version}"
37
+ rdoc.rdoc_files.include('README*')
38
+ rdoc.rdoc_files.include('lib/**/*.rb')
39
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,93 @@
1
+ require 'elif'
2
+
3
+ module Mspire
4
+ module Mascot
5
+ class Dat
6
+ # makes a byte index (not line index)
7
+ class Index
8
+
9
+ # the hash holding the start byte for each section (besides the
10
+ # queries)
11
+ attr_accessor :byte_num
12
+
13
+ # the array holding the start byte for each query. It is indexed by
14
+ # query number, so the first
15
+ attr_accessor :query_num_to_byte
16
+
17
+ # an array of the query nums
18
+ attr_accessor :query_nums
19
+
20
+ def initialize(io=nil)
21
+ @byte_num = {}
22
+ @query_num_to_byte = []
23
+ @query_nums = []
24
+ from_io(io) if io
25
+ end
26
+
27
+ def has_queries?
28
+ @query_nums.size > 0
29
+ end
30
+
31
+ # returns self
32
+ def from_io(io)
33
+ io.rewind
34
+ while line=io.gets
35
+ io.each_line do |line|
36
+ if md=/^Content-Type: application\/x-Mascot; name=["'](\w+)["']/.match(line)
37
+ head = md[1]
38
+ io.gets # the newline
39
+ pos = io.pos
40
+
41
+ if qmd=/query(\d+)/.match(head)
42
+ query_num = qmd[1].to_i
43
+ @query_nums << query_num
44
+ @query_num_to_byte[query_num] = pos
45
+ else
46
+ @byte_num[head] = pos
47
+ end
48
+ end
49
+ end
50
+ end
51
+ io.rewind
52
+
53
+ @query_nums.freeze
54
+ @query_num_to_byte.freeze
55
+ @byte_num.freeze
56
+ self
57
+ end
58
+
59
+ # given a string or symbol, looks up the start line. Given an
60
+ # Integer, assumes it is a query num and returns the start line of the
61
+ # query number.
62
+ def [](key)
63
+ if key.is_a?(Integer)
64
+ @query_num_to_byte[key]
65
+ else
66
+ @byte_num[key.to_s]
67
+ end
68
+ end
69
+
70
+ # nil if the query is out of bounds
71
+ def query(n)
72
+ @query_num_to_byte[n]
73
+ end
74
+
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ #--gc0p4Jq0M2Yt08jU534c0p
81
+ #Content-Type: application/x-Mascot; name="index"
82
+
83
+ #parameters=4
84
+ #masses=78
85
+ #unimod=119
86
+ #enzyme=484
87
+ #header=492
88
+ #summary=507
89
+ #peptides=820
90
+ #proteins=853
91
+ #query1=858
92
+ #query2=871
93
+ #--gc0p4Jq0M2Yt08jU534c0p--
@@ -0,0 +1,95 @@
1
+
2
+ module Mspire
3
+ module Mascot
4
+ class Dat
5
+ # mr = relative molecular mass; data contains keys of relative
6
+ Peptide = Struct.new(:missed_cleavages, :mr, :delta, :num_ions_matched, :seq, :peaks_from_ions_1, :var_mods_string, :ions_score, :ion_series_found, :peaks_from_ions_2, :peaks_from_ions_3, :query_num, :peptide_num, :proteins, :data) do
7
+ CAST = [:int, :float, :float, :int, :string, :int, :string, :float, :string, :int, :int]
8
+
9
+ # h49_q2 => [49, 2]; q2_p4_primary_nl => [2, 4, 'primary_nl]
10
+ def self.qnum_pnum(string)
11
+ (qns, pns, other) = string.split('_', 3)
12
+ [ *[qns, pns].map {|ns| ns[1..-1].to_i }, other ]
13
+ end
14
+ # takes an io object (positioned at the beginning of a peptide hit)
15
+ # and reads off the next peptide hit "q1_p1=0,798.23...". Returns nil
16
+ # if it reaches the end of the section or it is a blank line
17
+ def self.from_io(io, proteins=false, data=false)
18
+ finished = ->(line) { line.size < 2 || line[0,2] == '--' }
19
+ line = io.readline("\n")
20
+ if finished[line]
21
+ nil
22
+ else
23
+ (qp, core, protein_info) = line.split(/[=;]/)
24
+ (qnum, pnum, _) = qnum_pnum(qp)
25
+ vals = core.split(',').zip(CAST).map do |val, cast|
26
+ case cast
27
+ when :int then val.to_i
28
+ when :float then val.to_f
29
+ else
30
+ val
31
+ end
32
+ end
33
+ pephit = self.new(*vals, qnum, pnum)
34
+ raise NotImplementedError, "not reading proteins or data yet" if proteins || data
35
+ loop do
36
+ before = io.pos
37
+ line = io.readline("\n")
38
+ if finished[line]
39
+ io.pos = before
40
+ break
41
+ end
42
+ (qp, string) = line.split('=')
43
+ (qnum, pnum, other) = qnum_pnum(qp)
44
+ if pephit.peptide_num != pnum || pephit.query_num != qnum
45
+ io.pos = before
46
+ break
47
+ end
48
+ end
49
+ pephit
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ =begin
58
+ q1_p1=missed cleavages, (–1 indicates no match)
59
+ peptide Mr,
60
+ delta,
61
+ number of ions matched,
62
+ peptide string,
63
+ peaks used from Ions1,
64
+ variable modifications string,
65
+ ions score,
66
+ ion series found,
67
+ peaks used from Ions2,
68
+ peaks used from Ions3;
69
+ “accession string”: data for first protein
70
+ frame number:
71
+ start:
72
+ end:
73
+ multiplicity,
74
+ “accession string”: data for second protein
75
+ frame number:
76
+ start:
77
+ end:
78
+ multiplicity,
79
+ etc.
80
+ q1_p1_et_mods=modification mass,
81
+ neutral loss mass,
82
+ modification description
83
+ q1_p1_et_mods_master=neutral loss mass[[,neutral loss mass] ... ]
84
+ q1_p1_et_mods_slave=neutral loss mass[[,neutral loss mass] ... ]
85
+ q1_p1_primary_nl=neutral loss string
86
+ q1_p1_na_diff=original NA sequence,
87
+ modified NA sequence
88
+ q1_p1_tag=tagNum:startPos:endPos:seriesID,...
89
+ q1_p1_drange=startPos:endPos
90
+ q1_p1_terms=residue,residue[[:residue,residue] ... ]
91
+ q1_p1_subst=pos1,ambig1,matched1 ... ,posn,ambign,matchedn
92
+ q1_p1_comp=quantitation component name
93
+ q1_p2=...
94
+ =end
95
+
@@ -0,0 +1,69 @@
1
+ require 'ostruct'
2
+ require 'delegate'
3
+ require 'cgi'
4
+
5
+ module Mspire
6
+ module Mascot
7
+ class Dat
8
+ class Query < Hash
9
+
10
+ CAST = {
11
+ charge: ->(st) { (st[-1] << st[0...-1]).to_i },
12
+ title: ->(st) { CGI.unescape(st) },
13
+ mass_min: :to_f,
14
+ mass_max: :to_f,
15
+ int_min: :to_f,
16
+ int_max: :to_f,
17
+ num_vals: :to_i,
18
+ num_used1: :to_i,
19
+ index: :to_i,
20
+ Ions1: ->(st) { st.split(',').map {|pair_s| pair_s.split(':').map(&:to_f) } },
21
+ }
22
+
23
+ # returns self
24
+ def self.from_io(io)
25
+ query = self.new
26
+ while line = io.gets
27
+ break if line[0,2] == '--'
28
+ line.chomp!
29
+ (key, val) = line.split('=')
30
+ query[key.to_sym] = val
31
+ end
32
+ query.each do |k,v|
33
+ if cast=CAST[k]
34
+ apply = cast.is_a?(Symbol) ? cast.to_proc : cast
35
+ query[k] = apply[v] if apply
36
+ end
37
+ end
38
+ query
39
+ end
40
+
41
+ def method_missing(*args, &block)
42
+ if args[0].to_s[-1] == '='
43
+ if self.key?(args[0...-1])
44
+ self[ args[0...-1] ] = args[1]
45
+ else
46
+ super(*args, &block)
47
+ end
48
+ else
49
+ if self.key?(args[0])
50
+ self[ args[0] ]
51
+ else
52
+ super(*args, &block)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ #index=1
62
+ #charge=2+
63
+ #mass_min=305.484440
64
+ #mass_max=1998.945430
65
+ #int_min=1.5
66
+ #int_max=747
67
+ #num_vals=3
68
+ #num_used1=-1
69
+
@@ -0,0 +1,59 @@
1
+ require 'mspire/mascot/dat/index'
2
+ require 'mspire/mascot/dat/peptide'
3
+ require 'mspire/mascot/dat/query'
4
+
5
+ module Mspire
6
+ module Mascot
7
+ class Dat
8
+ # the io object which is the open dat file
9
+ attr_accessor :io
10
+
11
+ # the index object which points to the start byte for each section
12
+ attr_accessor :index
13
+
14
+ def initialize(io)
15
+ @io = io
16
+ @index = Index.new(@io)
17
+ end
18
+
19
+ def self.open(file, &block)
20
+ io = File.open(file)
21
+ block.call(self.new(io))
22
+ io.close
23
+ end
24
+
25
+ # positions io at the beginning of the section data (past the Content
26
+ # type and blank line). If given an integer, interprets it as a query
27
+ # number. returns self
28
+ def start_section!(name)
29
+ @io.pos = @index[name]
30
+ self
31
+ end
32
+
33
+ def query(n)
34
+ start_section!(n)
35
+ Query.from_io(@io)
36
+ end
37
+
38
+ def each_peptide(non_decoy=true, top_n=Float::INFINITY, &block)
39
+ block or return enum_for(__method__, non_decoy, top_n)
40
+ start_section!(non_decoy ? :peptides : :decoy_peptides)
41
+ while peptide = Peptide.from_io(@io)
42
+ block.call(peptide) if peptide.peptide_num <= top_n
43
+ end
44
+ end
45
+
46
+ # returns a list of all sections as symbols. The symbol :queries is
47
+ # returned rather than each query individually if their is 1 or more
48
+ # queries.
49
+ def sections
50
+ reply = @index.byte_num.keys
51
+ if @index.has_queries?
52
+ reply.push('queries')
53
+ end
54
+ reply.map(&:to_sym)
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,70 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "mspire-mascot-dat"
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["John T. Prince"]
12
+ s.date = "2013-03-28"
13
+ s.description = "Reads mascot dat files with gusto for mspire library."
14
+ s.email = "jtprince@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.md"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "LICENSE.txt",
23
+ "README.md",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/mspire/mascot/dat.rb",
27
+ "lib/mspire/mascot/dat/index.rb",
28
+ "lib/mspire/mascot/dat/peptide.rb",
29
+ "lib/mspire/mascot/dat/query.rb",
30
+ "mspire-mascot-dat.gemspec",
31
+ "spec/mspire/mascot/dat/index_spec.rb",
32
+ "spec/mspire/mascot/dat/peptide_spec.rb",
33
+ "spec/mspire/mascot/dat/query_spec.rb",
34
+ "spec/mspire/mascot/dat_spec.rb",
35
+ "spec/reference/dat_format_reference.md",
36
+ "spec/reference/two_spectra_decoy_F004129.png",
37
+ "spec/reference/two_spectra_no_decoy_F004128.png",
38
+ "spec/spec_helper.rb",
39
+ "spec/testfiles/F004128.dat",
40
+ "spec/testfiles/F004129.dat",
41
+ "spec/testfiles/two_spectra.mgf"
42
+ ]
43
+ s.homepage = "http://github.com/princelab/mspire-mascot-dat"
44
+ s.licenses = ["MIT"]
45
+ s.require_paths = ["lib"]
46
+ s.rubygems_version = "1.8.23"
47
+ s.summary = "Reads mascot dat files for mspire library."
48
+
49
+ if s.respond_to? :specification_version then
50
+ s.specification_version = 3
51
+
52
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
+ s.add_runtime_dependency(%q<elif>, ["~> 0.1.0"])
54
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
55
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
56
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
57
+ else
58
+ s.add_dependency(%q<elif>, ["~> 0.1.0"])
59
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
60
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
61
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
62
+ end
63
+ else
64
+ s.add_dependency(%q<elif>, ["~> 0.1.0"])
65
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
66
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
67
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
68
+ end
69
+ end
70
+
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/mascot/dat/index'
4
+
5
+ describe 'Mspire::Mascot::Dat::Index being initialized from file' do
6
+
7
+ let(:io) { File.open(TESTFILES + "/F004128.dat") }
8
+
9
+ specify '#initialize(io) creates the index object' do
10
+ Mspire::Mascot::Dat::Index.new(io).should be_a(Mspire::Mascot::Dat::Index)
11
+ end
12
+
13
+ describe Mspire::Mascot::Dat::Index do
14
+ subject { Mspire::Mascot::Dat::Index.new(io) }
15
+
16
+ it 'can access the header start byte nums' do
17
+
18
+ {
19
+ :parameters => 196,
20
+ :masses => 1203,
21
+ :unimod => 1873,
22
+ :enzyme => 20540,
23
+ :header => 20661,
24
+ :summary => 21103,
25
+ :peptides => 41624,
26
+ :proteins => 43894,
27
+ }.each {|head,val| subject[head].should == val }
28
+
29
+ end
30
+
31
+ it 'can get the queries' do
32
+ [1,2].zip([44129, 51186]) do |num, line_num|
33
+ subject.query(num).should == line_num
34
+ end
35
+ end
36
+
37
+ it 'can be accessed with brackets like array or hash' do
38
+ subject[1].should == 44129
39
+ subject[:peptides].should == 41624
40
+ subject['peptides'].should == 41624
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/mascot/dat/peptide'
4
+
5
+ describe 'reading off a Peptide' do
6
+
7
+ before(:all) do
8
+ info = <<STRING
9
+ q2_p1=0,2978.269196,1.195840,5,MDSSSGSQGNGSFMDQNSLGILNMDNLK,17,000000000000001000000000100000,4.11,0002000020000000000,0,0;"Q9VV79":0:1:28:1
10
+ q2_p1_terms=-,V
11
+ q2_p1_primary_nl=000000000000002000000000200000
12
+ q2_p2=1,2979.449478,0.015558,5,STGAESSEEXLREAYIMASVEHVNLLK,45,00000000000000000100000000000,2.84,0000000020000000000,0,0;"Q6SAG3":0:875:901:1
13
+ STRING
14
+ @io = StringIO.new(info)
15
+ end
16
+
17
+ it 'works' do
18
+ peptide = Mspire::Mascot::Dat::Peptide.from_io(@io)
19
+ { missed_cleavages: 0, mr: 2978.269196, delta: 1.19584, num_ions_matched: 5, seq: 'MDSSSGSQGNGSFMDQNSLGILNMDNLK', peaks_from_ions_1: 17, var_mods_string: '000000000000001000000000100000', ions_score: 4.11, ion_series_found: '0002000020000000000', peaks_from_ions_2: 0, peaks_from_ions_3: 0, query_num: 2, peptide_num: 1, proteins: nil, data: nil }.each do |k,v|
20
+ peptide.send(k).should == v
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ require 'mspire/mascot/dat/query'
4
+
5
+ describe 'creating a query object' do
6
+
7
+ before(:all) do
8
+ # this is hacked up to be much smaller, so don't look for consistency
9
+ # between the data and the spectrum
10
+ data = <<END
11
+ title=1%2e2746%2e2746%2e2
12
+ index=1
13
+ charge=2-
14
+ mass_min=305.484440
15
+ mass_max=1998.945430
16
+ int_min=1.5
17
+ int_max=747
18
+ num_vals=3
19
+ num_used1=-1
20
+ Ions1=371.460240:10.3,486.498990:15.7,538.381160:24.9
21
+ --gc0p4Jq0M2Yt08jU534c0p
22
+ Content-Type: application/x-Mascot; name="query2"
23
+ END
24
+ @io = StringIO.new(data)
25
+ end
26
+
27
+ it 'parses' do
28
+ query = Mspire::Mascot::Dat::Query.from_io(@io)
29
+ query.title.should == '1.2746.2746.2'
30
+ query.charge.should == -2
31
+ end
32
+ end
33
+