mspire-sequest 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/.autotest +30 -0
  2. data/.gitmodules +9 -0
  3. data/History +79 -0
  4. data/LICENSE +22 -0
  5. data/README.rdoc +85 -0
  6. data/Rakefile +52 -0
  7. data/VERSION +1 -0
  8. data/bin/srf_to_pepxml.rb +7 -0
  9. data/bin/srf_to_search.rb +7 -0
  10. data/bin/srf_to_sqt.rb +8 -0
  11. data/lib/mspire/sequest/params.rb +331 -0
  12. data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
  13. data/lib/mspire/sequest/pepxml/params.rb +32 -0
  14. data/lib/mspire/sequest/sqt.rb +393 -0
  15. data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
  16. data/lib/mspire/sequest/srf/pepxml.rb +333 -0
  17. data/lib/mspire/sequest/srf/search.rb +158 -0
  18. data/lib/mspire/sequest/srf/sqt.rb +218 -0
  19. data/lib/mspire/sequest/srf.rb +715 -0
  20. data/lib/mspire/sequest.rb +6 -0
  21. data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
  22. data/spec/mspire/sequest/params_spec.rb +135 -0
  23. data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
  24. data/spec/mspire/sequest/pepxml_spec.rb +311 -0
  25. data/spec/mspire/sequest/sqt_spec.rb +51 -0
  26. data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
  27. data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
  28. data/spec/mspire/sequest/srf/search_spec.rb +131 -0
  29. data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
  30. data/spec/mspire/sequest/srf_spec.rb +113 -0
  31. data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
  32. data/spec/spec_helper.rb +22 -0
  33. data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  34. data/spec/testfiles/bioworks31.params +77 -0
  35. data/spec/testfiles/bioworks32.params +62 -0
  36. data/spec/testfiles/bioworks33.params +63 -0
  37. data/spec/testfiles/corrupted_900.srf +0 -0
  38. data/spec/testfiles/small.sqt +87 -0
  39. data/spec/testfiles/small2.sqt +176 -0
  40. metadata +185 -0
data/.autotest ADDED
@@ -0,0 +1,30 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'spec/more'
5
+ require 'autotest/bacon'
6
+ #require 'redgreen/autotest'
7
+
8
+ class Autotest::Bacon < Autotest
9
+ undef make_test_cmd
10
+ def make_test_cmd(files_to_test)
11
+ args = files_to_test.keys.flatten.join(' ')
12
+ args = '-a' if args.empty?
13
+ # TODO : make regex to pass to -n using values
14
+ "#{ruby} -S bacon -I#{libs} -o TestUnit #{args}"
15
+ end
16
+ end
17
+
18
+
19
+ #Autotest.add_hook :initialize do |at|
20
+ # at.clear_mappings
21
+ #end
22
+
23
+ #Autotest.add_hook :initialize do |at|
24
+ #at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
25
+ # #["spec/#{m[1]}_spec.rb"]
26
+ # #["test/#{m[1]}_test.rb"]
27
+ # ## for both specs and tests:
28
+ # ["spec/#{m[1]}_spec.rb"]
29
+ #}
30
+ #end
data/.gitmodules ADDED
@@ -0,0 +1,9 @@
1
+ [submodule "submodule/ms-testdata"]
2
+ path = submodule/ms-testdata
3
+ url = git://github.com/bahuvrihi/ms-testdata.git
4
+ [submodule "submodule/ms-in_silico"]
5
+ path = submodule/ms-in_silico
6
+ url = git://github.com/bahuvrihi/ms-in_silico.git
7
+ [submodule "submodule/tap-mechanize"]
8
+ path = submodule/tap-mechanize
9
+ url = git://github.com/bahuvrihi/tap-mechanize.git
data/History ADDED
@@ -0,0 +1,79 @@
1
+ == 0.2.0 / 2011-09-13
2
+
3
+ Breaking backwards compatbility. Based on new msplat gem instead of ms-core, ms-ident, etc. MS module instead of Ms module.
4
+
5
+ == 0.1.0 / 2011-04-11
6
+
7
+ * moved Arrayclass objects to Struct objects
8
+
9
+ == 0.0.15 / 2010-08-25
10
+
11
+ * Fixed another bug in the srf_to_sqt.rb commandline when called without an output file
12
+
13
+ == 0.0.14 / 2010-08-24
14
+
15
+ * Merged commandline programs into lib heirarchy for testing
16
+ * Wrote specs for commandline programs (shared spec with programmatic interface)
17
+
18
+ == 0.0.13 / 2010-08-16
19
+
20
+ * compatible with ruby 1.9
21
+ * simplified Rakefile, ditching efforts at gh-pages converter for now
22
+ * removed dependency on Tap
23
+
24
+ == 0.0.12 / 2010-01-01
25
+
26
+ * moved over to jeweler and tests to bacon (spec/more)
27
+
28
+ == 0.0.11 / 2010-01-01
29
+
30
+ * peptides have sf value (read from srf file)
31
+
32
+ == 0.0.10 / 2009-12-03
33
+
34
+ * turned off warning if print_duplicates == 0
35
+
36
+ == 0.0.9 / 2009-09-08
37
+
38
+ * added capability to read srf files created by reading in .out/.dta folders (combined).
39
+ NOTE: please consider this functionality beta stage as it has not been extensively tested!
40
+ * cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
41
+
42
+ == 0.0.8 / 2009-06-29
43
+
44
+ * bugfix - only applies to windows: fixes an error on windows opening srf files and searching for the internal sequest.params file. File.open(<srf>) -> File.open(<srf>, 'rb').
45
+
46
+ == 0.0.7 / 2009-06-26
47
+
48
+ * minor bug fix in srf to sqt output in compatibility with current ms-core ms/mass tables
49
+
50
+ == 0.0.6 / 2009-06-26
51
+
52
+ * fixed bug affecting only version 0.0.5 in srf reader affecting any file where print_duplicate_references was less than the number of protein references found for a peptide, but also > 0.
53
+ * so, the srf reader now robustly supports reading srf files regardless of print_duplicate_references setting.
54
+
55
+ == 0.0.5 / 2009-06-22
56
+
57
+ * fixed handling of files with print_duplicate_references = 0
58
+ * removes .hdr postfix on the fasta path for srf -> SQT output
59
+
60
+ == 0.0.4 / 2009-06-18
61
+
62
+ * srf_to_sqt.rb and srf_to_search.rb both working now
63
+
64
+ == 0.0.3 / 2009-06-16
65
+
66
+ * only dependent on very simple ms/fasta interface, no more on digest info, etc.
67
+
68
+ == 0.0.2 / 2009-05-14
69
+
70
+ * Basic SRF to SQT translation working
71
+ * SQT reading working
72
+
73
+ == 0.0.1 / 2009-05-11
74
+
75
+ * pulled out of mspire core
76
+
77
+
78
+
79
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright shared among contributing institutions:
2
+ Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
+ Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
4
+ Author: John T. Prince under direction of Edward Marcotte and Natalie Ahn
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,85 @@
1
+ = ms-sequest
2
+
3
+ An {mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
4
+
5
+ == {Current API}[http://rubydoc.info/gems/ms-sequest]
6
+
7
+ == Examples
8
+
9
+ Provides two executables for extracting information from an Srf file (run without file for usage):
10
+
11
+ srf_to_sqt.rb file.srf # => file.sqt
12
+ srf_to_search.rb file.srf # => file.mgf (also can make .dta files)
13
+
14
+ === MS::Sequest::Srf
15
+
16
+ Can read and convert Bioworks Sequest Results Files (SRF).
17
+
18
+ require 'ms/sequest/srf'
19
+ srf = MS::Sequest::Srf.new("file.srf")
20
+
21
+ Conversions (see api for options):
22
+
23
+ require 'ms/sequest/srf/sqt' # require this in addition to 'ms/sequest/srf'
24
+ srf.to_sqt # (outputs a file) -> file.sqt
25
+
26
+ require 'ms/sequest/srf/search' # require this in addition to 'ms/sequest/srf'
27
+ srf.to_mgf # (outputs a file) -> file.mgf
28
+ srf.to_dta # (outputs a dir) -> file
29
+ srf.to_dta("file.tgz", :tgz) # on the fly tgz (requires archive-tar-minitar)
30
+
31
+ Object access (see MS::Sequest::Srf for much more):
32
+
33
+ srf.header # MS::Sequest::Srf::Header object
34
+ srf.params # MS::Sequest::Params object
35
+ srf.dta_files # MS::Sequest::Srf::Dta objects
36
+ srf.out_files # MS::Sequest::Srf::Out objects
37
+ srf.peptide_hits # MS::Sequest::Srf::Out::Peptide objects
38
+
39
+ === MS::Sequest::Params
40
+
41
+ Object or hash access to any parameter in the file. Also provides a unified interface across several versions (3.1 - 3.3)
42
+
43
+ require 'ms/sequest/params'
44
+ params = MS::Sequest::Params.new("sequest.params")
45
+ params.any_existing_param # -> some value or empty string if no value
46
+ params['any_existing_param'] # -> some value or empty string if no value
47
+ params.non_existent_param # -> nil
48
+
49
+ # some unified interace methods:
50
+ params.enzyme # -> enzyme name with no parentheses
51
+ params.database # -> first_database_name
52
+ params.enzyme_specificity # -> [offset, cleave_at, expect_if_after]
53
+ params.precursor_mass_type # => "average" | "monoisotopic"
54
+ params.fragment_mass_type # => "average" | "monoisotopic"
55
+
56
+ === MS::Sequest::Sqt
57
+
58
+ sqt = MS::Sequest::Sqt.new("file.sqt")
59
+ sqt.header
60
+ sqt.spectra.each do |spectrum| # an MS::Sequest::Sqt::Spectrum object
61
+ spectrum.matches.each do |match| # an MS::Sequest::Sqt::Match object
62
+ match.loci.each do |locus| # an MS::Sequest::Sqt::Locus object
63
+ end
64
+ end
65
+ end
66
+
67
+ # or more direct access to Match objects:
68
+ sqt.peptide_hits
69
+
70
+ Also reads Percolator SQT output files intelligently:
71
+
72
+ psqt = MS::Sequest::Sqt.new("percolator_output.sqt")
73
+ psqt.peptide_hits.each do |pmatch|
74
+ pmatch.percolator_score == pmatch.xcorr
75
+ pmatch.negative_q_value == pmatch.sp
76
+ pmatch.q_value == -pmatch.negative_q_value
77
+ end
78
+
79
+ == Installation
80
+
81
+ gem install ms-sequest
82
+
83
+ == Copyright
84
+
85
+ See LICENSE (MIT)
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gem|
6
+ gem.name = "mspire-sequest"
7
+ gem.homepage = "http://github.com/princelab/mspire-sequest"
8
+ gem.license = "MIT"
9
+ gem.summary = %Q{An mspire library supporting SEQUEST, Bioworks, SQT, etc}
10
+ gem.description = %Q{reads .SRF, .SQT and supports conversions}
11
+ gem.email = "jtprince@gmail.com"
12
+ gem.authors = ["John T. Prince"]
13
+ gem.rubyforge_project = 'mspire'
14
+ gem.add_runtime_dependency "mspire", "~> 0.7.3"
15
+ gem.add_runtime_dependency "trollop", "~> 1.16"
16
+ gem.add_development_dependency "jeweler", "~> 1.5.2"
17
+ gem.add_development_dependency "bio", "~> 1.4.2"
18
+ gem.add_development_dependency "ms-testdata", "= 0.2.1"
19
+ gem.add_development_dependency "rspec", "~> 2.8.0"
20
+ end
21
+ Jeweler::RubygemsDotOrgTasks.new
22
+
23
+ require 'rspec/core/rake_task'
24
+ RSpec::Core::RakeTask.new
25
+
26
+ #require 'rcov/rcovtask'
27
+ #Rcov::RcovTask.new do |spec|
28
+ # spec.libs << 'spec'
29
+ # spec.pattern = 'spec/**/*_spec.rb'
30
+ # spec.verbose = true
31
+ #end
32
+
33
+ task :default => :spec
34
+
35
+ require 'rdoc/task'
36
+ Rake::RDocTask.new do |rdoc|
37
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
38
+ rdoc.rdoc_dir = 'rdoc'
39
+ rdoc.title = "mspire-sequest #{version}"
40
+ rdoc.rdoc_files.include('README*')
41
+ rdoc.rdoc_files.include('lib/**/*.rb')
42
+ end
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.5
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'mspire/sequest/srf/pepxml'
5
+
6
+ Mspire::Sequest::Srf::Pepxml.commandline(ARGV, File.basename(__FILE__))
7
+
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'mspire/sequest/srf/search'
5
+
6
+ Mspire::Sequest::Srf::Search.commandline(ARGV, File.basename(__FILE__))
7
+
data/bin/srf_to_sqt.rb ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'mspire/sequest/srf/sqt'
5
+
6
+ Mspire::Sequest::Srf::Sqt.commandline(ARGV, File.basename(__FILE__))
7
+
8
+
@@ -0,0 +1,331 @@
1
+ require 'mspire/mass/aa'
2
+
3
+ # In the future, this guy should accept any version of bioworks params file
4
+ # and spit out any param queried.
5
+
6
+ module Mspire ; end
7
+ module Mspire::Sequest ; end
8
+
9
+ # 1) provides a reader and simple parameter lookup for SEQUEST params files
10
+ # supporting Bioworks 3.1-3.3.1.
11
+ # params = Mspire::Sequest::Params.new("sequest.params") # filename by default
12
+ # params = Mspire::Sequest::Params.new.parse_io(some_io_object)
13
+ #
14
+ # params.some_parameter # => any parameter defined has a method
15
+ # params.nonexistent_parameter # => nil
16
+ #
17
+ # Provides consistent behavior between different versions important info:
18
+ #
19
+ # # some basic methods shared by all versions:
20
+ # params.version # => '3.1' | '3.2' | '3.3'
21
+ # params.enzyme # => enzyme name with no parentheses
22
+ # params.min_number_termini
23
+ # params.database # => first_database_name
24
+ # params.enzyme_specificity # => [offset, cleave_at, expect_if_after]
25
+ # params.precursor_mass_type # => "average" | "monoisotopic"
26
+ # params.fragment_mass_type # => "average" | "monoisotopic"
27
+ #
28
+ # # some backwards/forwards compatibility methods:
29
+ # params.max_num_internal_cleavages # == max_num_internal_cleavage_sites
30
+ # params.fragment_ion_tol # => fragment_ion_tolerance
31
+ #
32
+ class Mspire::Sequest::Params
33
+
34
+ Bioworks31_Enzyme_Info_Array = [
35
+ ['No_Enzyme', 0, '-', '-'], # 0
36
+ ['Trypsin', 1, 'KR', '-'], # 1
37
+ ['Trypsin(KRLNH)', 1, 'KRLNH', '-'], # 2
38
+ ['Chymotrypsin', 1, 'FWYL', '-'], # 3
39
+ ['Chymotrypsin(FWY)', 1, 'FWY', 'P'], # 4
40
+ ['Clostripain', 1, 'R', '-'], # 5
41
+ ['Cyanogen_Bromide', 1, 'M', '-'], # 6
42
+ ['IodosoBenzoate', 1, 'W', '-'], # 7
43
+ ['Proline_Endopept', 1, 'P', '-'], # 8
44
+ ['Staph_Protease', 1, 'E', '-'], # 9
45
+ ['Trypsin_K', 1, 'K', 'P'], # 10
46
+ ['Trypsin_R', 1, 'R', 'P'], # 11
47
+ ['GluC', 1, 'ED', '-'], # 12
48
+ ['LysC', 1, 'K', '-'], # 13
49
+ ['AspN', 0, 'D', '-'], # 14
50
+ ['Elastase', 1, 'ALIV', 'P'], # 15
51
+ ['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'], # 16
52
+ ]
53
+
54
+ # current attributes supported are:
55
+ # bioworks 3.2:
56
+ @@param_re = / = ?/o
57
+ @@param_two_split = ';'
58
+ @@sequest_line = /\[SEQUEST\]/o
59
+
60
+ # the general options
61
+ attr_accessor :opts
62
+ # the static weights added to amino acids
63
+ attr_accessor :mods
64
+
65
+ # all keys and values stored as strings!
66
+ # will accept a sequest.params file or .srf file
67
+ def initialize(file=nil)
68
+ if file
69
+ parse_file(file)
70
+ end
71
+ end
72
+
73
+ # returns hash of params up until add_U_user_amino_acid
74
+ def grab_params(fh)
75
+ hash = {}
76
+ in_add_amino_acid_section = false
77
+ add_section_re = /^\s*add_/
78
+ prev_pos = nil
79
+ while line = fh.gets
80
+ if line =~ add_section_re
81
+ in_add_amino_acid_section = true
82
+ end
83
+ if (in_add_amino_acid_section and !(line =~ add_section_re))
84
+ fh.pos = prev_pos
85
+ break
86
+ end
87
+ prev_pos = fh.pos
88
+ if line =~ /\w+/
89
+ one,two = line.split @@param_re
90
+ two,comment = two.split @@param_two_split
91
+ hash[one] = two.rstrip
92
+ end
93
+ end
94
+ hash
95
+ end
96
+
97
+ # returns self or nil if no sequest found in the io
98
+ def parse_io(fh)
99
+ # seek to the SEQUEST file
100
+ if fh.respond_to?(:set_encoding)
101
+ # this mimics ruby1.8 behavior as we read in the file
102
+ fh.set_encoding('ASCII-8BIT')
103
+ end
104
+ loop do
105
+ line = fh.gets
106
+ return nil if line.nil? # we return nil if we reach then end of the file without seeing sequest params
107
+ if line =~ @@sequest_line
108
+ # double check that we are in a sequest params file:
109
+ pos = fh.pos
110
+ if fh.gets =~ /^first_database_name/
111
+ fh.pos = pos
112
+ break
113
+ end
114
+ end
115
+ end
116
+ @opts = grab_params(fh)
117
+ @opts["search_engine"] = "SEQUEST"
118
+ # extract out the mods
119
+ @mods = {}
120
+ @opts.each do |k,v|
121
+ if k =~ /^add_/
122
+ @mods[k] = @opts.delete(k)
123
+ end
124
+ end
125
+
126
+ ## this gets rid of the .hdr postfix on indexed databases
127
+ @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
128
+ self
129
+ end
130
+
131
+ ## parses file
132
+ ## and drops the .hdr behind indexed fasta files
133
+ ## returns self
134
+ ## can read sequest.params file or .srf file handle
135
+ def parse_file(file)
136
+ File.open(file) do |fh|
137
+ parse_io(fh)
138
+ end
139
+ self
140
+ end
141
+
142
+ # returns( offset, cleave_at, except_if_after )
143
+ # offset is an Integer specifying how far after an amino acid to cut
144
+ # cleave_at is a string of all amino acids that should be cut at
145
+ # except_if_after for not cutting after those
146
+ # normal tryptic behavior would be: [1, 'KR', 'P']
147
+ # NOTE: a '-' in a params file is returned as an '' (empty string)
148
+ # AspN is [0,'D','']
149
+ def enzyme_specificity
150
+ enzyme_ar =
151
+ if version == '3.1'
152
+ Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
153
+ elsif version >= '3.2'
154
+ arr = enzyme_info.split(/\s+/)[2,3]
155
+ arr[0] = arr[0].to_i
156
+ arr
157
+ else
158
+ raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
159
+ end
160
+ enzyme_ar.map! do |str|
161
+ if str == '-' ; ''
162
+ else ; str
163
+ end
164
+ end
165
+ enzyme_ar
166
+ end
167
+
168
+ # Returns the version of the sequest.params file
169
+ # Returns String "3.3" if contains "fragment_ion_units"
170
+ # Returns String "3.2" if contains "enyzme_info"
171
+ # Returns String "3.1" if contains "enzyme_number"
172
+ def version
173
+ if @opts['fragment_ion_units'] ; return '3.3'
174
+ elsif @opts['enzyme_info'] ; return '3.2'
175
+ elsif @opts['enzyme_number'] ; return '3.1'
176
+ end
177
+ end
178
+
179
+ ####################################################
180
+ # TO PEPXML
181
+ ####################################################
182
+ # In some ways, this is merely translating to the older Bioworks
183
+ # sequest.params files
184
+
185
+ # I'm not sure if this is the right mapping for sequence_search_constraint?
186
+ def sequence
187
+ pseq = @opts['partial_sequence']
188
+ if !pseq || pseq == "" ; pseq = "0" end
189
+ pseq
190
+ end
191
+
192
+ def precursor_mass_type
193
+ case @opts['mass_type_parent']
194
+ when '0' ; "average"
195
+ when '1' ; "monoisotopic"
196
+ else ; abort "error in mass_type_parent in sequest!"
197
+ end
198
+ end
199
+
200
+ def fragment_mass_type
201
+ fmtype =
202
+ case @opts['mass_type_fragment']
203
+ when '0' ; "average"
204
+ when '1' ; "monoisotopic"
205
+ else ; abort "error in mass_type_fragment in sequest!"
206
+ end
207
+ end
208
+
209
+ def method_missing(name, *args)
210
+ string = name.to_s
211
+ if @opts.key?(string) ; return @opts[string]
212
+ elsif @mods.key?(string) ; return @mods[string]
213
+ else ; return nil
214
+ end
215
+ end
216
+
217
+ ## We only need to define values if they are different than sequest.params
218
+ ## The method_missing will look them up in the hash!
219
+
220
+ # Returns a system independent basename
221
+ # Splits on "\" or "/"
222
+ def _sys_ind_basename(file)
223
+ return file.split(/[\\\/]/)[-1]
224
+ end
225
+
226
+ # changes the path of the database
227
+ def database_path=(newpath)
228
+ db = @opts["first_database_name"]
229
+ newpath = File.join(newpath, _sys_ind_basename(db))
230
+ @opts["first_database_name"] = newpath
231
+ end
232
+
233
+ def database
234
+ @opts["first_database_name"]
235
+ end
236
+
237
+ # returns the appropriate aminoacid mass lookup table from Mspire::Mass::AA
238
+ # based_on may be :precursor or :fragment
239
+ def mass_index(based_on=:precursor)
240
+ reply = case based_on
241
+ when :precursor ; precursor_mass_type
242
+ when :fragment ; fragment_mass_type
243
+ end
244
+ case reply
245
+ when 'average'
246
+ Mspire::Mass::AA::AVG
247
+ when 'monoisotopic'
248
+ Mspire::Mass::AA::MONO
249
+ end
250
+ end
251
+
252
+ # at least in Bioworks 3.2, the First number after the enzyme
253
+ # is the indication of the enzymatic end stringency (required):
254
+ # 1 = Fully enzymatic
255
+ # 2 = Either end
256
+ # 3 = N terminal only
257
+ # 4 = C terminal only
258
+ # So, to get min_number_termini we map like this:
259
+ # 1 => 2
260
+ # 2 => 1
261
+ def min_number_termini
262
+ if e_info = @opts["enzyme_info"]
263
+ case e_info.split(" ")[1]
264
+ when "1" ; return "2"
265
+ when "2" ; return "1"
266
+ end
267
+ end
268
+ warn "No Enzyme termini info, using min_number_termini = '1'"
269
+ return "1"
270
+ end
271
+
272
+ # returns the enzyme name (but no parentheses connected with the name).
273
+ # this will likely be capitalized.
274
+ # the regular expression splits the name and returns the first part (or just
275
+ # the name if not found)
276
+ def enzyme(split_on=/[_\(]/)
277
+ basic_name =
278
+ if self.version == '3.1'
279
+ Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
280
+ else # v >= '3.2' applies to all later versions??
281
+ @opts["enzyme_info"]
282
+ end
283
+ name_plus_parenthesis = basic_name.split(' ',2).first
284
+ name_plus_parenthesis.split(split_on,2).first
285
+ end
286
+
287
+ def max_num_internal_cleavages
288
+ @opts["max_num_internal_cleavage_sites"]
289
+ end
290
+
291
+ # my take on peptide_mass_units:
292
+ # (see http://www.ionsource.com/tutorial/isotopes/slide2.htm)
293
+ # amu = atomic mass units = (mass_real - mass_measured).abs (??abs??)
294
+ # mmu = milli mass units (amu / 1000)
295
+ # ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
296
+
297
+ def peptide_mass_tol
298
+ if @opts["peptide_mass_units"] != "0"
299
+ puts "WARNING: peptide_mass_tol units need to be adjusted!"
300
+ end
301
+ @opts["peptide_mass_tolerance"]
302
+ end
303
+
304
+ def fragment_ion_tol
305
+ @opts["fragment_ion_tolerance"]
306
+ end
307
+
308
+ def max_num_differential_AA_per_mod
309
+ @opts["max_num_differential_AA_per_mod"] || @opts["max_num_differential_per_peptide"]
310
+ end
311
+
312
+ # returns a hash by add_<whatever> of any static mods != 0
313
+ # the values are still as strings
314
+ def static_mods
315
+ hash = {}
316
+ @mods.each do |k,v|
317
+ if v.to_f != 0.0
318
+ hash[k] = v
319
+ end
320
+ end
321
+ hash
322
+ end
323
+
324
+ ## @TODO: We could add some of the parameters not currently being asked for to be more complete
325
+ ## @TODO: We could always add the Bioworks 3.2 specific params as params
326
+
327
+ ####################################################
328
+ ####################################################
329
+
330
+ end
331
+