mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
require 'sample_enzyme'
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
##
|
|
5
|
+
# In the future, this guy should accept any version of bioworks params file
|
|
6
|
+
# and spit out any param queried.
|
|
7
|
+
module Sequest ; end
|
|
8
|
+
class Sequest::Params
|
|
9
|
+
|
|
10
|
+
Bioworks31_Enzyme_Info_Array = [
|
|
11
|
+
['No_Enzyme', 0, '-', '-'], # 0
|
|
12
|
+
['Trypsin', 1, 'KR', '-'], # 1
|
|
13
|
+
['Trypsin(KRLNH)', 1, 'KRLNH', '-'], # 2
|
|
14
|
+
['Chymotrypsin', 1, 'FWYL', '-'], # 3
|
|
15
|
+
['Chymotrypsin(FWY)', 1, 'FWY', 'P'], # 4
|
|
16
|
+
['Clostripain', 1, 'R', '-'], # 5
|
|
17
|
+
['Cyanogen_Bromide', 1, 'M', '-'], # 6
|
|
18
|
+
['IodosoBenzoate', 1, 'W', '-'], # 7
|
|
19
|
+
['Proline_Endopept', 1, 'P', '-'], # 8
|
|
20
|
+
['Staph_Protease', 1, 'E', '-'], # 9
|
|
21
|
+
['Trypsin_K', 1, 'K', 'P'], # 10
|
|
22
|
+
['Trypsin_R', 1, 'R', 'P'], # 11
|
|
23
|
+
['GluC', 1, 'ED', '-'], # 12
|
|
24
|
+
['LysC', 1, 'K', '-'], # 13
|
|
25
|
+
['AspN', 0, 'D', '-'], # 14
|
|
26
|
+
['Elastase', 1, 'ALIV', 'P'], # 15
|
|
27
|
+
['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'], # 16
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# current attributes supported are:
|
|
31
|
+
# bioworks 3.2:
|
|
32
|
+
@@param_re = / = ?/o
|
|
33
|
+
@@param_two_split = ';'
|
|
34
|
+
@@sequest_line = /\[SEQUEST\]/o
|
|
35
|
+
|
|
36
|
+
# the general options
|
|
37
|
+
attr_accessor :opts
|
|
38
|
+
# the static weights added to amino acids
|
|
39
|
+
attr_accessor :mods
|
|
40
|
+
|
|
41
|
+
# all keys and values stored as strings!
|
|
42
|
+
# will accept a sequest.params file or .srf file
|
|
43
|
+
def initialize(file=nil)
|
|
44
|
+
if file
|
|
45
|
+
parse(file)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# returns hash of params up until add_U_user_amino_acid
|
|
50
|
+
def grab_params(fh)
|
|
51
|
+
hash = {}
|
|
52
|
+
in_add_amino_acid_section = false
|
|
53
|
+
add_section_re = /^\s*add_/
|
|
54
|
+
prev_pos = nil
|
|
55
|
+
while line = fh.gets
|
|
56
|
+
if line =~ add_section_re
|
|
57
|
+
in_add_amino_acid_section = true
|
|
58
|
+
end
|
|
59
|
+
if (in_add_amino_acid_section and !(line =~ add_section_re))
|
|
60
|
+
fh.pos = prev_pos
|
|
61
|
+
break
|
|
62
|
+
end
|
|
63
|
+
prev_pos = fh.pos
|
|
64
|
+
if line =~ /\w+/
|
|
65
|
+
one,two = line.split @@param_re
|
|
66
|
+
two,comment = two.split @@param_two_split
|
|
67
|
+
hash[one] = two.rstrip
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
hash
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# returns self
|
|
74
|
+
def parse_handle(fh)
|
|
75
|
+
# seek to the SEQUEST file
|
|
76
|
+
loop do
|
|
77
|
+
if fh.gets =~ @@sequest_line
|
|
78
|
+
# double check that we are in a sequest params file:
|
|
79
|
+
pos = fh.pos
|
|
80
|
+
if fh.gets =~ /^first_database_name/
|
|
81
|
+
fh.pos = pos
|
|
82
|
+
break
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
@opts = grab_params(fh)
|
|
87
|
+
@opts["search_engine"] = "SEQUEST"
|
|
88
|
+
# extract out the mods
|
|
89
|
+
@mods = {}
|
|
90
|
+
@opts.each do |k,v|
|
|
91
|
+
if k =~ /^add_/
|
|
92
|
+
@mods[k] = @opts.delete(k)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
## this gets rid of the .hdr postfix on indexed databases
|
|
97
|
+
@opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
|
|
98
|
+
self
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
## parses file
|
|
102
|
+
## and drops the .hdr behind indexed fasta files
|
|
103
|
+
## returns self
|
|
104
|
+
## can read sequest.params file or .srf file handle
|
|
105
|
+
def parse(file)
|
|
106
|
+
File.open(file) do |fh|
|
|
107
|
+
parse_handle(fh)
|
|
108
|
+
end
|
|
109
|
+
self
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# returns( offset, cleave_at, except_if_after )
|
|
113
|
+
# offset is an Integer specifying how far after an amino acid to cut
|
|
114
|
+
# cleave_at is a string of all amino acids that should be cut at
|
|
115
|
+
# except_if_after for not cutting after those
|
|
116
|
+
# normal tryptic behavior would be: [1, 'KR', 'P']
|
|
117
|
+
# NOTE: a '-' in a params file is returned as an '' (empty string)
|
|
118
|
+
# AspN is [0,'D','']
|
|
119
|
+
def enzyme_specificity
|
|
120
|
+
enzyme_ar =
|
|
121
|
+
if version == '3.1'
|
|
122
|
+
Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
|
|
123
|
+
elsif version >= '3.2'
|
|
124
|
+
arr = enzyme_info.split(/\s+/)[2,3]
|
|
125
|
+
arr[0] = arr[0].to_i
|
|
126
|
+
arr
|
|
127
|
+
else
|
|
128
|
+
raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
|
|
129
|
+
end
|
|
130
|
+
enzyme_ar.map! do |str|
|
|
131
|
+
if str == '-' ; ''
|
|
132
|
+
else ; str
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
enzyme_ar
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Returns the version of the sequest.params file
|
|
139
|
+
# Returns String "3.3" if contains "fragment_ion_units"
|
|
140
|
+
# Returns String "3.2" if contains "enyzme_info"
|
|
141
|
+
# Returns String "3.1" if contains "enzyme_number"
|
|
142
|
+
def version
|
|
143
|
+
if @opts['fragment_ion_units'] ; return '3.3'
|
|
144
|
+
elsif @opts['enzyme_info'] ; return '3.2'
|
|
145
|
+
elsif @opts['enzyme_number'] ; return '3.1'
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
####################################################
|
|
150
|
+
# TO PEPXML
|
|
151
|
+
####################################################
|
|
152
|
+
# In some ways, this is merely translating to the older Bioworks
|
|
153
|
+
# sequest.params files
|
|
154
|
+
|
|
155
|
+
# I'm not sure if this is the right mapping for sequence_search_constraint?
|
|
156
|
+
def sequence
|
|
157
|
+
pseq = @opts['partial_sequence']
|
|
158
|
+
if !pseq || pseq == "" ; pseq = "0" end
|
|
159
|
+
pseq
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def precursor_mass_type
|
|
163
|
+
case @opts['mass_type_parent']
|
|
164
|
+
when '0' ; "average"
|
|
165
|
+
when '1' ; "monoisotopic"
|
|
166
|
+
else ; abort "error in mass_type_parent in sequest!"
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def fragment_mass_type
|
|
171
|
+
fmtype =
|
|
172
|
+
case @opts['mass_type_fragment']
|
|
173
|
+
when '0' ; "average"
|
|
174
|
+
when '1' ; "monoisotopic"
|
|
175
|
+
else ; abort "error in mass_type_fragment in sequest!"
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def method_missing(name, *args)
|
|
180
|
+
string = name.to_s
|
|
181
|
+
if @opts.key?(string) ; return @opts[string]
|
|
182
|
+
elsif @mods.key?(string) ; return @mods[string]
|
|
183
|
+
else ; return nil
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
## We only need to define values if they are different than sequest.params
|
|
188
|
+
## The method_missing will look them up in the hash!
|
|
189
|
+
|
|
190
|
+
# Returns a system independent basename
|
|
191
|
+
# Splits on "\" or "/"
|
|
192
|
+
def _sys_ind_basename(file)
|
|
193
|
+
return file.split(/[\\\/]/)[-1]
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# changes the path of the database
|
|
197
|
+
def database_path=(newpath)
|
|
198
|
+
db = @opts["first_database_name"]
|
|
199
|
+
newpath = File.join(newpath, _sys_ind_basename(db))
|
|
200
|
+
@opts["first_database_name"] = newpath
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def database
|
|
204
|
+
@opts["first_database_name"]
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# returns the appropriate aminoacid mass lookup table (in spec_id.rb SpecID::MONO or
|
|
208
|
+
# SpecID::AVG based on precursor_mass_type
|
|
209
|
+
def mass_table
|
|
210
|
+
case precursor_mass_type
|
|
211
|
+
when 'average'
|
|
212
|
+
SpecID::AVG
|
|
213
|
+
when 'monoisotopic'
|
|
214
|
+
SpecID::MONO
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# at least in Bioworks 3.2, the First number after the enzyme
|
|
219
|
+
# is the indication of the enzymatic end stringency (required):
|
|
220
|
+
# 1 = Fully enzymatic
|
|
221
|
+
# 2 = Either end
|
|
222
|
+
# 3 = N terminal only
|
|
223
|
+
# 4 = C terminal only
|
|
224
|
+
# So, to get min_number_termini we map like this:
|
|
225
|
+
# 1 => 2
|
|
226
|
+
# 2 => 1
|
|
227
|
+
def min_number_termini
|
|
228
|
+
termini_number = @opts["enzyme_info"].split(" ")[1]
|
|
229
|
+
if termini_number == "1"
|
|
230
|
+
return "2"
|
|
231
|
+
elsif termini_number == "2"
|
|
232
|
+
return "1"
|
|
233
|
+
else
|
|
234
|
+
puts "WARNING: Enzyme termini info might be imprecise!"
|
|
235
|
+
return "1"
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# returns a SampleEnzyme object
|
|
240
|
+
def sample_enzyme
|
|
241
|
+
(offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
|
|
242
|
+
if v == '' ; nil ; else v end
|
|
243
|
+
end
|
|
244
|
+
SampleEnzyme.new do |se|
|
|
245
|
+
se.name = self.enzyme
|
|
246
|
+
se.cut = cleave_at
|
|
247
|
+
se.no_cut = except_if_after
|
|
248
|
+
se.sense =
|
|
249
|
+
if se.name == "No_Enzyme"
|
|
250
|
+
nil
|
|
251
|
+
elsif offset == 1
|
|
252
|
+
'C'
|
|
253
|
+
elsif offset == 0
|
|
254
|
+
'N'
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# returns the enzyme name (but no parentheses connected with the name).
|
|
260
|
+
# this will likely be capitalized.
|
|
261
|
+
def enzyme
|
|
262
|
+
v = self.version
|
|
263
|
+
basic_name =
|
|
264
|
+
if v == '3.1'
|
|
265
|
+
Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
|
|
266
|
+
elsif v >= '3.2'
|
|
267
|
+
@opts["enzyme_info"]
|
|
268
|
+
end
|
|
269
|
+
basic_name.split('(')[0]
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def max_num_internal_cleavages
|
|
273
|
+
@opts["max_num_internal_cleavage_sites"]
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# my take on peptide_mass_units:
|
|
277
|
+
# (see http://www.ionsource.com/tutorial/isotopes/slide2.htm)
|
|
278
|
+
# amu = atomic mass units = (mass_real - mass_measured).abs (??abs??)
|
|
279
|
+
# mmu = milli mass units (amu / 1000)
|
|
280
|
+
# ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]
|
|
281
|
+
|
|
282
|
+
def peptide_mass_tol
|
|
283
|
+
if @opts["peptide_mass_units"] != "0"
|
|
284
|
+
puts "WARNING: peptide_mass_tol units need to be adjusted!"
|
|
285
|
+
end
|
|
286
|
+
@opts["peptide_mass_tolerance"]
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def fragment_ion_tol
|
|
290
|
+
@opts["fragment_ion_tolerance"]
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def max_num_differential_AA_per_mod
|
|
294
|
+
@opts["max_num_differential_per_peptide"]
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# returns a hash by add_<whatever> of any static mods != 0
|
|
298
|
+
# the values are still as strings
|
|
299
|
+
def static_mods
|
|
300
|
+
hash = {}
|
|
301
|
+
@mods.each do |k,v|
|
|
302
|
+
if v.to_f != 0.0
|
|
303
|
+
hash[k] = v
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
hash
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
## @TODO: We could add some of the parameters not currently being asked for to be more complete
|
|
310
|
+
## @TODO: We could always add the Bioworks 3.2 specific params as params
|
|
311
|
+
|
|
312
|
+
####################################################
|
|
313
|
+
####################################################
|
|
314
|
+
|
|
315
|
+
end
|
|
316
|
+
|