mspire-sequest 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
data/.autotest
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'spec/more'
|
5
|
+
require 'autotest/bacon'
|
6
|
+
#require 'redgreen/autotest'
|
7
|
+
|
8
|
+
class Autotest::Bacon < Autotest
|
9
|
+
undef make_test_cmd
|
10
|
+
def make_test_cmd(files_to_test)
|
11
|
+
args = files_to_test.keys.flatten.join(' ')
|
12
|
+
args = '-a' if args.empty?
|
13
|
+
# TODO : make regex to pass to -n using values
|
14
|
+
"#{ruby} -S bacon -I#{libs} -o TestUnit #{args}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
#Autotest.add_hook :initialize do |at|
|
20
|
+
# at.clear_mappings
|
21
|
+
#end
|
22
|
+
|
23
|
+
#Autotest.add_hook :initialize do |at|
|
24
|
+
#at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
|
25
|
+
# #["spec/#{m[1]}_spec.rb"]
|
26
|
+
# #["test/#{m[1]}_test.rb"]
|
27
|
+
# ## for both specs and tests:
|
28
|
+
# ["spec/#{m[1]}_spec.rb"]
|
29
|
+
#}
|
30
|
+
#end
|
data/.gitmodules
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
[submodule "submodule/ms-testdata"]
|
2
|
+
path = submodule/ms-testdata
|
3
|
+
url = git://github.com/bahuvrihi/ms-testdata.git
|
4
|
+
[submodule "submodule/ms-in_silico"]
|
5
|
+
path = submodule/ms-in_silico
|
6
|
+
url = git://github.com/bahuvrihi/ms-in_silico.git
|
7
|
+
[submodule "submodule/tap-mechanize"]
|
8
|
+
path = submodule/tap-mechanize
|
9
|
+
url = git://github.com/bahuvrihi/tap-mechanize.git
|
data/History
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
== 0.2.0 / 2011-09-13
|
2
|
+
|
3
|
+
Breaking backwards compatbility. Based on new msplat gem instead of ms-core, ms-ident, etc. MS module instead of Ms module.
|
4
|
+
|
5
|
+
== 0.1.0 / 2011-04-11
|
6
|
+
|
7
|
+
* moved Arrayclass objects to Struct objects
|
8
|
+
|
9
|
+
== 0.0.15 / 2010-08-25
|
10
|
+
|
11
|
+
* Fixed another bug in the srf_to_sqt.rb commandline when called without an output file
|
12
|
+
|
13
|
+
== 0.0.14 / 2010-08-24
|
14
|
+
|
15
|
+
* Merged commandline programs into lib heirarchy for testing
|
16
|
+
* Wrote specs for commandline programs (shared spec with programmatic interface)
|
17
|
+
|
18
|
+
== 0.0.13 / 2010-08-16
|
19
|
+
|
20
|
+
* compatible with ruby 1.9
|
21
|
+
* simplified Rakefile, ditching efforts at gh-pages converter for now
|
22
|
+
* removed dependency on Tap
|
23
|
+
|
24
|
+
== 0.0.12 / 2010-01-01
|
25
|
+
|
26
|
+
* moved over to jeweler and tests to bacon (spec/more)
|
27
|
+
|
28
|
+
== 0.0.11 / 2010-01-01
|
29
|
+
|
30
|
+
* peptides have sf value (read from srf file)
|
31
|
+
|
32
|
+
== 0.0.10 / 2009-12-03
|
33
|
+
|
34
|
+
* turned off warning if print_duplicates == 0
|
35
|
+
|
36
|
+
== 0.0.9 / 2009-09-08
|
37
|
+
|
38
|
+
* added capability to read srf files created by reading in .out/.dta folders (combined).
|
39
|
+
NOTE: please consider this functionality beta stage as it has not been extensively tested!
|
40
|
+
* cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
|
41
|
+
|
42
|
+
== 0.0.8 / 2009-06-29
|
43
|
+
|
44
|
+
* bugfix - only applies to windows: fixes an error on windows opening srf files and searching for the internal sequest.params file. File.open(<srf>) -> File.open(<srf>, 'rb').
|
45
|
+
|
46
|
+
== 0.0.7 / 2009-06-26
|
47
|
+
|
48
|
+
* minor bug fix in srf to sqt output in compatibility with current ms-core ms/mass tables
|
49
|
+
|
50
|
+
== 0.0.6 / 2009-06-26
|
51
|
+
|
52
|
+
* fixed bug affecting only version 0.0.5 in srf reader affecting any file where print_duplicate_references was less than the number of protein references found for a peptide, but also > 0.
|
53
|
+
* so, the srf reader now robustly supports reading srf files regardless of print_duplicate_references setting.
|
54
|
+
|
55
|
+
== 0.0.5 / 2009-06-22
|
56
|
+
|
57
|
+
* fixed handling of files with print_duplicate_references = 0
|
58
|
+
* removes .hdr postfix on the fasta path for srf -> SQT output
|
59
|
+
|
60
|
+
== 0.0.4 / 2009-06-18
|
61
|
+
|
62
|
+
* srf_to_sqt.rb and srf_to_search.rb both working now
|
63
|
+
|
64
|
+
== 0.0.3 / 2009-06-16
|
65
|
+
|
66
|
+
* only dependent on very simple ms/fasta interface, no more on digest info, etc.
|
67
|
+
|
68
|
+
== 0.0.2 / 2009-05-14
|
69
|
+
|
70
|
+
* Basic SRF to SQT translation working
|
71
|
+
* SQT reading working
|
72
|
+
|
73
|
+
== 0.0.1 / 2009-05-11
|
74
|
+
|
75
|
+
* pulled out of mspire core
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright shared among contributing institutions:
|
2
|
+
Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
|
3
|
+
Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
|
4
|
+
Author: John T. Prince under direction of Edward Marcotte and Natalie Ahn
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
= ms-sequest
|
2
|
+
|
3
|
+
An {mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
|
4
|
+
|
5
|
+
== {Current API}[http://rubydoc.info/gems/ms-sequest]
|
6
|
+
|
7
|
+
== Examples
|
8
|
+
|
9
|
+
Provides two executables for extracting information from an Srf file (run without file for usage):
|
10
|
+
|
11
|
+
srf_to_sqt.rb file.srf # => file.sqt
|
12
|
+
srf_to_search.rb file.srf # => file.mgf (also can make .dta files)
|
13
|
+
|
14
|
+
=== MS::Sequest::Srf
|
15
|
+
|
16
|
+
Can read and convert Bioworks Sequest Results Files (SRF).
|
17
|
+
|
18
|
+
require 'ms/sequest/srf'
|
19
|
+
srf = MS::Sequest::Srf.new("file.srf")
|
20
|
+
|
21
|
+
Conversions (see api for options):
|
22
|
+
|
23
|
+
require 'ms/sequest/srf/sqt' # require this in addition to 'ms/sequest/srf'
|
24
|
+
srf.to_sqt # (outputs a file) -> file.sqt
|
25
|
+
|
26
|
+
require 'ms/sequest/srf/search' # require this in addition to 'ms/sequest/srf'
|
27
|
+
srf.to_mgf # (outputs a file) -> file.mgf
|
28
|
+
srf.to_dta # (outputs a dir) -> file
|
29
|
+
srf.to_dta("file.tgz", :tgz) # on the fly tgz (requires archive-tar-minitar)
|
30
|
+
|
31
|
+
Object access (see MS::Sequest::Srf for much more):
|
32
|
+
|
33
|
+
srf.header # MS::Sequest::Srf::Header object
|
34
|
+
srf.params # MS::Sequest::Params object
|
35
|
+
srf.dta_files # MS::Sequest::Srf::Dta objects
|
36
|
+
srf.out_files # MS::Sequest::Srf::Out objects
|
37
|
+
srf.peptide_hits # MS::Sequest::Srf::Out::Peptide objects
|
38
|
+
|
39
|
+
=== MS::Sequest::Params
|
40
|
+
|
41
|
+
Object or hash access to any parameter in the file. Also provides a unified interface across several versions (3.1 - 3.3)
|
42
|
+
|
43
|
+
require 'ms/sequest/params'
|
44
|
+
params = MS::Sequest::Params.new("sequest.params")
|
45
|
+
params.any_existing_param # -> some value or empty string if no value
|
46
|
+
params['any_existing_param'] # -> some value or empty string if no value
|
47
|
+
params.non_existent_param # -> nil
|
48
|
+
|
49
|
+
# some unified interace methods:
|
50
|
+
params.enzyme # -> enzyme name with no parentheses
|
51
|
+
params.database # -> first_database_name
|
52
|
+
params.enzyme_specificity # -> [offset, cleave_at, expect_if_after]
|
53
|
+
params.precursor_mass_type # => "average" | "monoisotopic"
|
54
|
+
params.fragment_mass_type # => "average" | "monoisotopic"
|
55
|
+
|
56
|
+
=== MS::Sequest::Sqt
|
57
|
+
|
58
|
+
sqt = MS::Sequest::Sqt.new("file.sqt")
|
59
|
+
sqt.header
|
60
|
+
sqt.spectra.each do |spectrum| # an MS::Sequest::Sqt::Spectrum object
|
61
|
+
spectrum.matches.each do |match| # an MS::Sequest::Sqt::Match object
|
62
|
+
match.loci.each do |locus| # an MS::Sequest::Sqt::Locus object
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# or more direct access to Match objects:
|
68
|
+
sqt.peptide_hits
|
69
|
+
|
70
|
+
Also reads Percolator SQT output files intelligently:
|
71
|
+
|
72
|
+
psqt = MS::Sequest::Sqt.new("percolator_output.sqt")
|
73
|
+
psqt.peptide_hits.each do |pmatch|
|
74
|
+
pmatch.percolator_score == pmatch.xcorr
|
75
|
+
pmatch.negative_q_value == pmatch.sp
|
76
|
+
pmatch.q_value == -pmatch.negative_q_value
|
77
|
+
end
|
78
|
+
|
79
|
+
== Installation
|
80
|
+
|
81
|
+
gem install ms-sequest
|
82
|
+
|
83
|
+
== Copyright
|
84
|
+
|
85
|
+
See LICENSE (MIT)
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gem|
|
6
|
+
gem.name = "mspire-sequest"
|
7
|
+
gem.homepage = "http://github.com/princelab/mspire-sequest"
|
8
|
+
gem.license = "MIT"
|
9
|
+
gem.summary = %Q{An mspire library supporting SEQUEST, Bioworks, SQT, etc}
|
10
|
+
gem.description = %Q{reads .SRF, .SQT and supports conversions}
|
11
|
+
gem.email = "jtprince@gmail.com"
|
12
|
+
gem.authors = ["John T. Prince"]
|
13
|
+
gem.rubyforge_project = 'mspire'
|
14
|
+
gem.add_runtime_dependency "mspire", "~> 0.7.3"
|
15
|
+
gem.add_runtime_dependency "trollop", "~> 1.16"
|
16
|
+
gem.add_development_dependency "jeweler", "~> 1.5.2"
|
17
|
+
gem.add_development_dependency "bio", "~> 1.4.2"
|
18
|
+
gem.add_development_dependency "ms-testdata", "= 0.2.1"
|
19
|
+
gem.add_development_dependency "rspec", "~> 2.8.0"
|
20
|
+
end
|
21
|
+
Jeweler::RubygemsDotOrgTasks.new
|
22
|
+
|
23
|
+
require 'rspec/core/rake_task'
|
24
|
+
RSpec::Core::RakeTask.new
|
25
|
+
|
26
|
+
#require 'rcov/rcovtask'
|
27
|
+
#Rcov::RcovTask.new do |spec|
|
28
|
+
# spec.libs << 'spec'
|
29
|
+
# spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
# spec.verbose = true
|
31
|
+
#end
|
32
|
+
|
33
|
+
task :default => :spec
|
34
|
+
|
35
|
+
require 'rdoc/task'
|
36
|
+
Rake::RDocTask.new do |rdoc|
|
37
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
38
|
+
rdoc.rdoc_dir = 'rdoc'
|
39
|
+
rdoc.title = "mspire-sequest #{version}"
|
40
|
+
rdoc.rdoc_files.include('README*')
|
41
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.5
|
data/bin/srf_to_sqt.rb
ADDED
@@ -0,0 +1,331 @@
|
|
1
|
+
require 'mspire/mass/aa'
|
2
|
+
|
3
|
+
# In the future, this guy should accept any version of bioworks params file
|
4
|
+
# and spit out any param queried.
|
5
|
+
|
6
|
+
module Mspire ; end
|
7
|
+
module Mspire::Sequest ; end
|
8
|
+
|
9
|
+
# 1) provides a reader and simple parameter lookup for SEQUEST params files
|
10
|
+
# supporting Bioworks 3.1-3.3.1.
|
11
|
+
# params = Mspire::Sequest::Params.new("sequest.params") # filename by default
|
12
|
+
# params = Mspire::Sequest::Params.new.parse_io(some_io_object)
|
13
|
+
#
|
14
|
+
# params.some_parameter # => any parameter defined has a method
|
15
|
+
# params.nonexistent_parameter # => nil
|
16
|
+
#
|
17
|
+
# Provides consistent behavior between different versions important info:
|
18
|
+
#
|
19
|
+
# # some basic methods shared by all versions:
|
20
|
+
# params.version # => '3.1' | '3.2' | '3.3'
|
21
|
+
# params.enzyme # => enzyme name with no parentheses
|
22
|
+
# params.min_number_termini
|
23
|
+
# params.database # => first_database_name
|
24
|
+
# params.enzyme_specificity # => [offset, cleave_at, expect_if_after]
|
25
|
+
# params.precursor_mass_type # => "average" | "monoisotopic"
|
26
|
+
# params.fragment_mass_type # => "average" | "monoisotopic"
|
27
|
+
#
|
28
|
+
# # some backwards/forwards compatibility methods:
|
29
|
+
# params.max_num_internal_cleavages # == max_num_internal_cleavage_sites
|
30
|
+
# params.fragment_ion_tol # => fragment_ion_tolerance
|
31
|
+
#
|
32
|
+
class Mspire::Sequest::Params
|
33
|
+
|
34
|
+
Bioworks31_Enzyme_Info_Array = [
|
35
|
+
['No_Enzyme', 0, '-', '-'], # 0
|
36
|
+
['Trypsin', 1, 'KR', '-'], # 1
|
37
|
+
['Trypsin(KRLNH)', 1, 'KRLNH', '-'], # 2
|
38
|
+
['Chymotrypsin', 1, 'FWYL', '-'], # 3
|
39
|
+
['Chymotrypsin(FWY)', 1, 'FWY', 'P'], # 4
|
40
|
+
['Clostripain', 1, 'R', '-'], # 5
|
41
|
+
['Cyanogen_Bromide', 1, 'M', '-'], # 6
|
42
|
+
['IodosoBenzoate', 1, 'W', '-'], # 7
|
43
|
+
['Proline_Endopept', 1, 'P', '-'], # 8
|
44
|
+
['Staph_Protease', 1, 'E', '-'], # 9
|
45
|
+
['Trypsin_K', 1, 'K', 'P'], # 10
|
46
|
+
['Trypsin_R', 1, 'R', 'P'], # 11
|
47
|
+
['GluC', 1, 'ED', '-'], # 12
|
48
|
+
['LysC', 1, 'K', '-'], # 13
|
49
|
+
['AspN', 0, 'D', '-'], # 14
|
50
|
+
['Elastase', 1, 'ALIV', 'P'], # 15
|
51
|
+
['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'], # 16
|
52
|
+
]
|
53
|
+
|
54
|
+
# current attributes supported are:
|
55
|
+
# bioworks 3.2:
|
56
|
+
@@param_re = / = ?/o
|
57
|
+
@@param_two_split = ';'
|
58
|
+
@@sequest_line = /\[SEQUEST\]/o
|
59
|
+
|
60
|
+
# the general options
|
61
|
+
attr_accessor :opts
|
62
|
+
# the static weights added to amino acids
|
63
|
+
attr_accessor :mods
|
64
|
+
|
65
|
+
# all keys and values stored as strings!
|
66
|
+
# will accept a sequest.params file or .srf file
|
67
|
+
def initialize(file=nil)
|
68
|
+
if file
|
69
|
+
parse_file(file)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# returns hash of params up until add_U_user_amino_acid
|
74
|
+
def grab_params(fh)
|
75
|
+
hash = {}
|
76
|
+
in_add_amino_acid_section = false
|
77
|
+
add_section_re = /^\s*add_/
|
78
|
+
prev_pos = nil
|
79
|
+
while line = fh.gets
|
80
|
+
if line =~ add_section_re
|
81
|
+
in_add_amino_acid_section = true
|
82
|
+
end
|
83
|
+
if (in_add_amino_acid_section and !(line =~ add_section_re))
|
84
|
+
fh.pos = prev_pos
|
85
|
+
break
|
86
|
+
end
|
87
|
+
prev_pos = fh.pos
|
88
|
+
if line =~ /\w+/
|
89
|
+
one,two = line.split @@param_re
|
90
|
+
two,comment = two.split @@param_two_split
|
91
|
+
hash[one] = two.rstrip
|
92
|
+
end
|
93
|
+
end
|
94
|
+
hash
|
95
|
+
end
|
96
|
+
|
97
|
+
# returns self or nil if no sequest found in the io
|
98
|
+
def parse_io(fh)
|
99
|
+
# seek to the SEQUEST file
|
100
|
+
if fh.respond_to?(:set_encoding)
|
101
|
+
# this mimics ruby1.8 behavior as we read in the file
|
102
|
+
fh.set_encoding('ASCII-8BIT')
|
103
|
+
end
|
104
|
+
loop do
|
105
|
+
line = fh.gets
|
106
|
+
return nil if line.nil? # we return nil if we reach then end of the file without seeing sequest params
|
107
|
+
if line =~ @@sequest_line
|
108
|
+
# double check that we are in a sequest params file:
|
109
|
+
pos = fh.pos
|
110
|
+
if fh.gets =~ /^first_database_name/
|
111
|
+
fh.pos = pos
|
112
|
+
break
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
@opts = grab_params(fh)
|
117
|
+
@opts["search_engine"] = "SEQUEST"
|
118
|
+
# extract out the mods
|
119
|
+
@mods = {}
|
120
|
+
@opts.each do |k,v|
|
121
|
+
if k =~ /^add_/
|
122
|
+
@mods[k] = @opts.delete(k)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
## this gets rid of the .hdr postfix on indexed databases
|
127
|
+
@opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
|
128
|
+
self
|
129
|
+
end
|
130
|
+
|
131
|
+
## parses file
|
132
|
+
## and drops the .hdr behind indexed fasta files
|
133
|
+
## returns self
|
134
|
+
## can read sequest.params file or .srf file handle
|
135
|
+
def parse_file(file)
|
136
|
+
File.open(file) do |fh|
|
137
|
+
parse_io(fh)
|
138
|
+
end
|
139
|
+
self
|
140
|
+
end
|
141
|
+
|
142
|
+
# returns( offset, cleave_at, except_if_after )
|
143
|
+
# offset is an Integer specifying how far after an amino acid to cut
|
144
|
+
# cleave_at is a string of all amino acids that should be cut at
|
145
|
+
# except_if_after for not cutting after those
|
146
|
+
# normal tryptic behavior would be: [1, 'KR', 'P']
|
147
|
+
# NOTE: a '-' in a params file is returned as an '' (empty string)
|
148
|
+
# AspN is [0,'D','']
|
149
|
+
def enzyme_specificity
|
150
|
+
enzyme_ar =
|
151
|
+
if version == '3.1'
|
152
|
+
Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
|
153
|
+
elsif version >= '3.2'
|
154
|
+
arr = enzyme_info.split(/\s+/)[2,3]
|
155
|
+
arr[0] = arr[0].to_i
|
156
|
+
arr
|
157
|
+
else
|
158
|
+
raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
|
159
|
+
end
|
160
|
+
enzyme_ar.map! do |str|
|
161
|
+
if str == '-' ; ''
|
162
|
+
else ; str
|
163
|
+
end
|
164
|
+
end
|
165
|
+
enzyme_ar
|
166
|
+
end
|
167
|
+
|
168
|
+
# Returns the version of the sequest.params file
|
169
|
+
# Returns String "3.3" if contains "fragment_ion_units"
|
170
|
+
# Returns String "3.2" if contains "enyzme_info"
|
171
|
+
# Returns String "3.1" if contains "enzyme_number"
|
172
|
+
def version
|
173
|
+
if @opts['fragment_ion_units'] ; return '3.3'
|
174
|
+
elsif @opts['enzyme_info'] ; return '3.2'
|
175
|
+
elsif @opts['enzyme_number'] ; return '3.1'
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
####################################################
|
180
|
+
# TO PEPXML
|
181
|
+
####################################################
|
182
|
+
# In some ways, this is merely translating to the older Bioworks
|
183
|
+
# sequest.params files
|
184
|
+
|
185
|
+
# I'm not sure if this is the right mapping for sequence_search_constraint?
|
186
|
+
def sequence
|
187
|
+
pseq = @opts['partial_sequence']
|
188
|
+
if !pseq || pseq == "" ; pseq = "0" end
|
189
|
+
pseq
|
190
|
+
end
|
191
|
+
|
192
|
+
def precursor_mass_type
|
193
|
+
case @opts['mass_type_parent']
|
194
|
+
when '0' ; "average"
|
195
|
+
when '1' ; "monoisotopic"
|
196
|
+
else ; abort "error in mass_type_parent in sequest!"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def fragment_mass_type
|
201
|
+
fmtype =
|
202
|
+
case @opts['mass_type_fragment']
|
203
|
+
when '0' ; "average"
|
204
|
+
when '1' ; "monoisotopic"
|
205
|
+
else ; abort "error in mass_type_fragment in sequest!"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def method_missing(name, *args)
|
210
|
+
string = name.to_s
|
211
|
+
if @opts.key?(string) ; return @opts[string]
|
212
|
+
elsif @mods.key?(string) ; return @mods[string]
|
213
|
+
else ; return nil
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
## We only need to define values if they are different than sequest.params
|
218
|
+
## The method_missing will look them up in the hash!
|
219
|
+
|
220
|
+
# Returns a system independent basename
|
221
|
+
# Splits on "\" or "/"
|
222
|
+
def _sys_ind_basename(file)
|
223
|
+
return file.split(/[\\\/]/)[-1]
|
224
|
+
end
|
225
|
+
|
226
|
+
# changes the path of the database
|
227
|
+
def database_path=(newpath)
|
228
|
+
db = @opts["first_database_name"]
|
229
|
+
newpath = File.join(newpath, _sys_ind_basename(db))
|
230
|
+
@opts["first_database_name"] = newpath
|
231
|
+
end
|
232
|
+
|
233
|
+
def database
|
234
|
+
@opts["first_database_name"]
|
235
|
+
end
|
236
|
+
|
237
|
+
# returns the appropriate aminoacid mass lookup table from Mspire::Mass::AA
|
238
|
+
# based_on may be :precursor or :fragment
|
239
|
+
def mass_index(based_on=:precursor)
|
240
|
+
reply = case based_on
|
241
|
+
when :precursor ; precursor_mass_type
|
242
|
+
when :fragment ; fragment_mass_type
|
243
|
+
end
|
244
|
+
case reply
|
245
|
+
when 'average'
|
246
|
+
Mspire::Mass::AA::AVG
|
247
|
+
when 'monoisotopic'
|
248
|
+
Mspire::Mass::AA::MONO
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# at least in Bioworks 3.2, the First number after the enzyme
|
253
|
+
# is the indication of the enzymatic end stringency (required):
|
254
|
+
# 1 = Fully enzymatic
|
255
|
+
# 2 = Either end
|
256
|
+
# 3 = N terminal only
|
257
|
+
# 4 = C terminal only
|
258
|
+
# So, to get min_number_termini we map like this:
|
259
|
+
# 1 => 2
|
260
|
+
# 2 => 1
|
261
|
+
def min_number_termini
|
262
|
+
if e_info = @opts["enzyme_info"]
|
263
|
+
case e_info.split(" ")[1]
|
264
|
+
when "1" ; return "2"
|
265
|
+
when "2" ; return "1"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
warn "No Enzyme termini info, using min_number_termini = '1'"
|
269
|
+
return "1"
|
270
|
+
end
|
271
|
+
|
272
|
+
# returns the enzyme name (but no parentheses connected with the name).
|
273
|
+
# this will likely be capitalized.
|
274
|
+
# the regular expression splits the name and returns the first part (or just
|
275
|
+
# the name if not found)
|
276
|
+
def enzyme(split_on=/[_\(]/)
|
277
|
+
basic_name =
|
278
|
+
if self.version == '3.1'
|
279
|
+
Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
|
280
|
+
else # v >= '3.2' applies to all later versions??
|
281
|
+
@opts["enzyme_info"]
|
282
|
+
end
|
283
|
+
name_plus_parenthesis = basic_name.split(' ',2).first
|
284
|
+
name_plus_parenthesis.split(split_on,2).first
|
285
|
+
end
|
286
|
+
|
287
|
+
def max_num_internal_cleavages
|
288
|
+
@opts["max_num_internal_cleavage_sites"]
|
289
|
+
end
|
290
|
+
|
291
|
+
# my take on peptide_mass_units:
|
292
|
+
# (see http://www.ionsource.com/tutorial/isotopes/slide2.htm)
|
293
|
+
# amu = atomic mass units = (mass_real - mass_measured).abs (??abs??)
|
294
|
+
# mmu = milli mass units (amu / 1000)
|
295
|
+
# ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
|
296
|
+
|
297
|
+
def peptide_mass_tol
|
298
|
+
if @opts["peptide_mass_units"] != "0"
|
299
|
+
puts "WARNING: peptide_mass_tol units need to be adjusted!"
|
300
|
+
end
|
301
|
+
@opts["peptide_mass_tolerance"]
|
302
|
+
end
|
303
|
+
|
304
|
+
def fragment_ion_tol
|
305
|
+
@opts["fragment_ion_tolerance"]
|
306
|
+
end
|
307
|
+
|
308
|
+
def max_num_differential_AA_per_mod
|
309
|
+
@opts["max_num_differential_AA_per_mod"] || @opts["max_num_differential_per_peptide"]
|
310
|
+
end
|
311
|
+
|
312
|
+
# returns a hash by add_<whatever> of any static mods != 0
|
313
|
+
# the values are still as strings
|
314
|
+
def static_mods
|
315
|
+
hash = {}
|
316
|
+
@mods.each do |k,v|
|
317
|
+
if v.to_f != 0.0
|
318
|
+
hash[k] = v
|
319
|
+
end
|
320
|
+
end
|
321
|
+
hash
|
322
|
+
end
|
323
|
+
|
324
|
+
## @TODO: We could add some of the parameters not currently being asked for to be more complete
|
325
|
+
## @TODO: We could always add the Bioworks 3.2 specific params as params
|
326
|
+
|
327
|
+
####################################################
|
328
|
+
####################################################
|
329
|
+
|
330
|
+
end
|
331
|
+
|