mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/spec_id.rb
DELETED
@@ -1,778 +0,0 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require 'set'
|
3
|
-
require 'hash_by'
|
4
|
-
require 'roc'
|
5
|
-
require 'sample_enzyme' # for others
|
6
|
-
require 'spec_id/bioworks'
|
7
|
-
require 'spec_id/sequest'
|
8
|
-
|
9
|
-
require 'spec_id/proph/prot_summary'
|
10
|
-
require 'spec_id/proph/pep_summary'
|
11
|
-
|
12
|
-
require 'spec_id_xml'
|
13
|
-
require 'spec_id/sqt'
|
14
|
-
require 'spec_id/mass'
|
15
|
-
require 'fasta'
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
module ProteinReferenceable ; end
|
20
|
-
|
21
|
-
class SampleEnzyme ; end
|
22
|
-
|
23
|
-
|
24
|
-
module SpecID ; end
|
25
|
-
|
26
|
-
class GenericSpecID ; include SpecID ; end
|
27
|
-
|
28
|
-
module SpecID
|
29
|
-
MONO = Mass::MONO
|
30
|
-
AVG = Mass::AVG
|
31
|
-
|
32
|
-
attr_accessor :peps, :prots
|
33
|
-
# True if a high protein/peptide score is better than low, false otherwise
|
34
|
-
# This is set automatically for known file types
|
35
|
-
attr_accessor :hi_prob_best
|
36
|
-
|
37
|
-
# A relative pathname of the file the specid object is derived from
|
38
|
-
attr_accessor :filename
|
39
|
-
|
40
|
-
# tp = file_type
|
41
|
-
# Will return a SpecID object (really, the object corresponding to the
|
42
|
-
# file type which mixes in SpecID [is_a?(SpecID) == true])
|
43
|
-
# If no file is given, will return a GenericSpecID object.
|
44
|
-
# If file is an array, this is assumed to be a group of srf files which is
|
45
|
-
# converted into an SRFGroup Ojbect and run.
|
46
|
-
def self.new(file=nil, tp=nil)
|
47
|
-
# this will need to be specialized for other groups later
|
48
|
-
if file.is_a?(Array)
|
49
|
-
# takes an array of srf filenames
|
50
|
-
SRFGroup.new(file)
|
51
|
-
elsif file
|
52
|
-
from_file(file, tp)
|
53
|
-
else
|
54
|
-
GenericSpecID.new
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
# tp = file_type
|
59
|
-
# a single srf file will be packaged into an SRFGroup object
|
60
|
-
def self.from_file(file, tp=nil)
|
61
|
-
obj = nil
|
62
|
-
unless tp
|
63
|
-
tp = file_type(file)
|
64
|
-
end
|
65
|
-
obj = case tp
|
66
|
-
when 'srf'
|
67
|
-
#@hi_prob_best = false
|
68
|
-
SRFGroup.new([file])
|
69
|
-
when 'srg'
|
70
|
-
#@hi_prob_best = false
|
71
|
-
SRFGroup.new(file)
|
72
|
-
when 'bioworks'
|
73
|
-
#@hi_prob_best = false
|
74
|
-
Bioworks.new(file)
|
75
|
-
when 'protproph'
|
76
|
-
#@hi_prob_best = true
|
77
|
-
Proph::ProtSummary.new(file)
|
78
|
-
when 'pepproph'
|
79
|
-
Proph::PepSummary.new(file)
|
80
|
-
when 'sqg'
|
81
|
-
SQTGroup.new(file)
|
82
|
-
when 'sqt'
|
83
|
-
SQTGroup.new([file])
|
84
|
-
else
|
85
|
-
abort "UNRECOGNIZED file type for #{file}"
|
86
|
-
end
|
87
|
-
obj
|
88
|
-
end
|
89
|
-
|
90
|
-
def inspect
|
91
|
-
peps_string =
|
92
|
-
if peps
|
93
|
-
"peps(#)=#{peps.size}"
|
94
|
-
else
|
95
|
-
"peps=(nil)"
|
96
|
-
end
|
97
|
-
"<#{self.class} #{peps_string}>"
|
98
|
-
end
|
99
|
-
|
100
|
-
# given some list of SpecID::Pep based objects, returns the list of proteins
|
101
|
-
# associated with those peptides
|
102
|
-
# kind must be a symbol:
|
103
|
-
# :no_update (current proteins are returned, but their peps attribute
|
104
|
-
# is not updated)
|
105
|
-
# :update (current proteins returned with peps attribute updated)
|
106
|
-
# :new (new proteins are created complete with peps attribute)
|
107
|
-
def self.protein_list(pephits, kind=:no_update)
|
108
|
-
|
109
|
-
orig_pephits_prts = []
|
110
|
-
if kind == :new
|
111
|
-
new_prots = {}
|
112
|
-
pephits.each_with_index do |pep,i|
|
113
|
-
orig_pephits_prts[i] = pep.prots
|
114
|
-
peps_new_prts = pep.prots.map do |prt|
|
115
|
-
if new_prots.key? prt.reference
|
116
|
-
already_exists = new_prots[prt.reference]
|
117
|
-
else
|
118
|
-
np = prt.dup
|
119
|
-
np.peps = []
|
120
|
-
new_prots[np.reference] = np
|
121
|
-
np
|
122
|
-
end
|
123
|
-
end
|
124
|
-
pep.prots = peps_new_prts
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
if kind == :update
|
129
|
-
pephits.each do |pep|
|
130
|
-
pep.prots.each do |prt|
|
131
|
-
prt.peps = []
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
prot_set = {}
|
137
|
-
pephits.each do |pep|
|
138
|
-
prts = pep.prots
|
139
|
-
prts.each do |prt|
|
140
|
-
prot_set[ prt.reference ] = prt
|
141
|
-
end
|
142
|
-
if (kind == :update || kind == :new)
|
143
|
-
prts.each do |prt|
|
144
|
-
prt.peps << pep
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
## Reset the original protein hits
|
150
|
-
if kind == :new
|
151
|
-
pephits.each_with_index do |pep,i|
|
152
|
-
pep.prots = orig_pephits_prts[i]
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
prot_set.values
|
157
|
-
end
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
# takes a comma separated list or array and extends the last to create an
|
162
|
-
# array of desired size
|
163
|
-
def self.extend_args(arg, desired_size)
|
164
|
-
arg_arr = arg
|
165
|
-
if arg.is_a? String
|
166
|
-
arg_arr = arg.split(',')
|
167
|
-
end
|
168
|
-
new_arr = []
|
169
|
-
last_arg = arg_arr[0]
|
170
|
-
desired_size.times do |i|
|
171
|
-
if arg_arr[i]
|
172
|
-
new_arr[i] = arg_arr[i]
|
173
|
-
last_arg = new_arr[i]
|
174
|
-
else
|
175
|
-
new_arr[i] = last_arg
|
176
|
-
end
|
177
|
-
end
|
178
|
-
new_arr
|
179
|
-
end
|
180
|
-
|
181
|
-
# takes an array of proteins, each having peps
|
182
|
-
# peptide grouping is done
|
183
|
-
# by-
|
184
|
-
# the protein with the most unique peptides ends up taking any
|
185
|
-
# degenerate peptides, tie goes to one with most hits total, then the one
|
186
|
-
# that had the top xcorr(s) (before removing any peptides).All other
|
187
|
-
# proteins with identical peptides will lose those peptides. So, the rich
|
188
|
-
# stay rich, and the poor get poorer.
|
189
|
-
# returns an array of triplets where each is [prot, pep_hits,
|
190
|
-
# uniq_aaseqs] (uniq_aaseqs is an array) where the protein contains >= 1
|
191
|
-
# peptide. The internal links (prot.peps and pep.prots) is NOT modified!!
|
192
|
-
# update_prots == true will set each protein with the peptides found
|
193
|
-
def self.occams_razor(array_of_prots, update_prots=false)
|
194
|
-
peps_found = Set.new
|
195
|
-
|
196
|
-
to_sort = array_of_prots.map do |prot|
|
197
|
-
pps = prot.peps
|
198
|
-
|
199
|
-
peps_by_uniq_aaseq = pps.hash_by(:aaseq)
|
200
|
-
uniq_aaseqs = Set.new( pps.map {|pep| pep.aaseq } )
|
201
|
-
xcorrs = pps.map {|pep| pep.xcorr }
|
202
|
-
|
203
|
-
silly = OpenStruct.new
|
204
|
-
# 0 1 2 3 4 5
|
205
|
-
[uniq_aaseqs.size, pps.size, xcorrs.sort, prot, uniq_aaseqs, peps_by_uniq_aaseq]
|
206
|
-
end
|
207
|
-
prot_triplets = []
|
208
|
-
to_sort.sort.reverse.each do |ar|
|
209
|
-
prot = ar[3]
|
210
|
-
## overlapping set:
|
211
|
-
common = peps_found & ar[4]
|
212
|
-
## find the uniq ones in our little set of peptides:
|
213
|
-
uniq = ar[4] - common
|
214
|
-
pep_hits = []
|
215
|
-
if uniq.size != 0
|
216
|
-
## add to the found list:
|
217
|
-
peps_found.merge(uniq)
|
218
|
-
uniq.each do |seq|
|
219
|
-
pep_hits.push( *(ar[5][seq]) )
|
220
|
-
end
|
221
|
-
prot_triplets << [prot, pep_hits, uniq.to_a]
|
222
|
-
prot.peps = pep_hits if update_prots
|
223
|
-
end
|
224
|
-
end
|
225
|
-
prot_triplets
|
226
|
-
end
|
227
|
-
|
228
|
-
# returns number of true positives (array) and the specified output (as
|
229
|
-
# parallel array). Requires the classification method and a sorted array of
|
230
|
-
# tp values and an array fp values.
|
231
|
-
# (This is simply a wrapper around ROC#by_tps method!)
|
232
|
-
def by_tps(classification_method, tp, fp)
|
233
|
-
ROC.new.by_tps(classification_method, tp, fp)
|
234
|
-
end
|
235
|
-
|
236
|
-
# from the unique set of peptide hits, create a separate peptide hit for
|
237
|
-
# each protein reference where that peptide only references that protein
|
238
|
-
# e.g. pep.prots = [(a single protein)]
|
239
|
-
def pep_prots
|
240
|
-
pps = []
|
241
|
-
peps.each do |pep|
|
242
|
-
pep.prots.map do |prt|
|
243
|
-
pep.dup
|
244
|
-
pep.prots = [prt]
|
245
|
-
pps << pep
|
246
|
-
end
|
247
|
-
end
|
248
|
-
pps
|
249
|
-
end
|
250
|
-
|
251
|
-
def self.prots?(ar)
|
252
|
-
ar.first.is_a? SpecID::Prot
|
253
|
-
end
|
254
|
-
|
255
|
-
def self.peps?(ar)
|
256
|
-
ar.first.is_a? SpecID::Pep
|
257
|
-
end
|
258
|
-
|
259
|
-
# for older stuff
|
260
|
-
def classify_by_regex(items, regex, decoy_on_match=true, ties=:both)
|
261
|
-
objects =
|
262
|
-
case items
|
263
|
-
when :prots
|
264
|
-
prots
|
265
|
-
when :peps
|
266
|
-
peps
|
267
|
-
end
|
268
|
-
SpecID.classify_by_prot(objects, regex, decoy_on_match, ties)
|
269
|
-
end
|
270
|
-
|
271
|
-
# includes the peptide hit in both
|
272
|
-
# returns (target, decoy)
|
273
|
-
# (for peps) ties can be :both, true (target wins), false (decoy wins)
|
274
|
-
# regardless of ties behavior, will partition out the proteins to be
|
275
|
-
# appropriate for the peptide
|
276
|
-
def self.classify_by_prot(items, regex, decoy_on_match=true, ties=:both)
|
277
|
-
if items.size == 0
|
278
|
-
return [[],[]]
|
279
|
-
elsif prots?(items)
|
280
|
-
myproc = proc { |prt|
|
281
|
-
if prt.reference =~ regex ; !decoy_on_match
|
282
|
-
else ; decoy_on_match end
|
283
|
-
}
|
284
|
-
return classify(items, myproc)
|
285
|
-
elsif peps?(items)
|
286
|
-
match = [] ; nomatch = []
|
287
|
-
items.each do |pep|
|
288
|
-
(match_prots, nomatch_prots) = pep.prots.partition do |prot|
|
289
|
-
prot.reference =~ regex
|
290
|
-
end
|
291
|
-
if match_prots.size == 0
|
292
|
-
nomatch << pep
|
293
|
-
elsif nomatch_prots.size == 0
|
294
|
-
match << pep
|
295
|
-
else ## both have hits
|
296
|
-
pep.prots = match_prots
|
297
|
-
nomatch_pep = pep.dup
|
298
|
-
nomatch_pep.prots = nomatch_prots
|
299
|
-
|
300
|
-
# resolve ties
|
301
|
-
case ties
|
302
|
-
when true
|
303
|
-
if decoy_on_match
|
304
|
-
nomatch << pep
|
305
|
-
else
|
306
|
-
match << pep
|
307
|
-
end
|
308
|
-
when false
|
309
|
-
if decoy_on_match
|
310
|
-
match << pep
|
311
|
-
else
|
312
|
-
nomatch << pep
|
313
|
-
end
|
314
|
-
when :both
|
315
|
-
match << pep
|
316
|
-
nomatch << pep
|
317
|
-
else ; raise ArgumentError
|
318
|
-
end
|
319
|
-
end
|
320
|
-
end
|
321
|
-
if decoy_on_match
|
322
|
-
return [nomatch , match]
|
323
|
-
else
|
324
|
-
return [match, nomatch]
|
325
|
-
end
|
326
|
-
else
|
327
|
-
raise ArgumentError, "arg1 is ar of objects descended from SpecID::Prot/Pep"
|
328
|
-
end
|
329
|
-
end
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
# returns [tp, fp] based on the protein prefix for items where items =
|
334
|
-
# (:prot|:peps)
|
335
|
-
# this may result in a duplication of some peptides if they match both
|
336
|
-
# normal and decoy proteins. In this case, the protein arrays are split,
|
337
|
-
# too, so that each points only to its breed of protein.
|
338
|
-
def classify_by_decoy_flag(items, flag, decoy_on_match=true, prefix=false)
|
339
|
-
if prefix
|
340
|
-
regex = /^#{Regexp.escape(flag)}/
|
341
|
-
else
|
342
|
-
regex = /#{Regexp.escape(flag)}/
|
343
|
-
end
|
344
|
-
classify_by_regex(items, regex, decoy_on_match)
|
345
|
-
end
|
346
|
-
|
347
|
-
# Returns (match, nomatch)
|
348
|
-
# items = symbol (:prots, :peps)
|
349
|
-
# Returns two arrays, those returning true from classify_item_by and those
|
350
|
-
# returning false
|
351
|
-
def classify(items, classify_item_by)
|
352
|
-
its = send(items)
|
353
|
-
f = []; t = []
|
354
|
-
its.each do |it|
|
355
|
-
if classify_item_by.call(it)
|
356
|
-
t << it
|
357
|
-
else
|
358
|
-
f << it
|
359
|
-
end
|
360
|
-
end
|
361
|
-
[t,f]
|
362
|
-
end
|
363
|
-
|
364
|
-
# returns two arrays, true positives and false positives (determined by proc
|
365
|
-
# classify_item_by) sorted by proc rank_item_by. Items will be ranked from
|
366
|
-
# lowest to highest based on the return value of rank_item_by. items is a
|
367
|
-
# symbol (:prots or :peps)
|
368
|
-
def rank_and_classify(items, rank_item_by, classify_item_by)
|
369
|
-
its = send(items)
|
370
|
-
#its.each do |it| puts it.probability.to_s ; puts it.reference end
|
371
|
-
doublets = its.collect do |item|
|
372
|
-
[ rank_item_by.call(item),
|
373
|
-
classify_item_by.call(item) ]
|
374
|
-
end
|
375
|
-
roc = ROC.new
|
376
|
-
tp, fp = roc.doublets_to_separate(doublets)
|
377
|
-
return tp, fp
|
378
|
-
end
|
379
|
-
|
380
|
-
|
381
|
-
# returns a proc for getting all probabilities so that an ascending sort
|
382
|
-
# will put the best scores first
|
383
|
-
def probability_proc
|
384
|
-
if hi_prob_best
|
385
|
-
get_prob_proc = proc {|prt| prt.probability * -1 }
|
386
|
-
else
|
387
|
-
get_prob_proc = proc {|prt| prt.probability }
|
388
|
-
end
|
389
|
-
get_prob_proc
|
390
|
-
end
|
391
|
-
|
392
|
-
def separate_by_prefix(items, fp_prefix)
|
393
|
-
its = send(items)
|
394
|
-
|
395
|
-
if items == :prots
|
396
|
-
elsif items == :peps
|
397
|
-
abort "not implemented yet"
|
398
|
-
else
|
399
|
-
abort "no other items recognized yet"
|
400
|
-
end
|
401
|
-
end
|
402
|
-
|
403
|
-
# sorts the probabilities and then
|
404
|
-
# calcs predicted number hits and precision for protein probabilities
|
405
|
-
# (summing probabilities)
|
406
|
-
# one_minus_ppv = SUM(1-probX)/#prots = what is commonly and mistakenly
|
407
|
-
# called false positive rate
|
408
|
-
# SUM(1-probX)/#prots
|
409
|
-
def num_hits_and_ppv_for_protein_prophet_probabilities
|
410
|
-
current_sum_one_minus_prob = 0.0
|
411
|
-
num_prots = []
|
412
|
-
ppv = []
|
413
|
-
prot_cnt = 0
|
414
|
-
probs = prots.map {|v| v.probability}
|
415
|
-
sorted = probs.sort.reverse
|
416
|
-
sorted.each do |prob|
|
417
|
-
prot_cnt += 1
|
418
|
-
num_prots << prot_cnt
|
419
|
-
current_sum_one_minus_prob += 1.0 - prob
|
420
|
-
ppv << 1.0 - ( current_sum_one_minus_prob / prot_cnt )
|
421
|
-
# current_fpr_ratio = current_sum_one_minus_prob / prot_cnt
|
422
|
-
end
|
423
|
-
[num_prots, ppv]
|
424
|
-
end
|
425
|
-
|
426
|
-
# convenience method for the common task of determining precision for
|
427
|
-
# proteins (with decoy proteins found by false_flag)
|
428
|
-
# returns (num_hits, precision)
|
429
|
-
def num_hits_and_ppv_for_prob(false_flag, prefix=false)
|
430
|
-
if prefix
|
431
|
-
regex = /^#{Regexp.escape(false_flag)}/
|
432
|
-
else
|
433
|
-
regex = /#{Regexp.escape(false_flag)}/
|
434
|
-
end
|
435
|
-
prob_proc = probability_proc
|
436
|
-
myproc = proc { |prt|
|
437
|
-
if prt.reference =~ regex ; false
|
438
|
-
else ; true end
|
439
|
-
}
|
440
|
-
|
441
|
-
real_hits, decoy_hits = rank_and_classify(:prots, prob_proc, myproc)
|
442
|
-
|
443
|
-
(num_hits, num_tps, precision) = DecoyROC.new.pred_and_tps_and_ppv(real_hits, decoy_hits)
|
444
|
-
[num_hits, precision]
|
445
|
-
end
|
446
|
-
|
447
|
-
# # takes the existing spec_id object and marshals it into "file.msh"
|
448
|
-
# # a new file will always look for a file.msh to load
|
449
|
-
# def marshal(force=false)
|
450
|
-
# if !(File.exist? @marshal_file)| force
|
451
|
-
# File.open(@marshal_file, 'w') {|out| Marshal.dump(@obj, out) }
|
452
|
-
# end
|
453
|
-
# end
|
454
|
-
|
455
|
-
# Returns 'bioworks' if bioworks xml, 'protproph' if Protein prophet
|
456
|
-
# 'srf' if SRF file, 'srg' if search results group file.
|
457
|
-
def self.file_type(file)
|
458
|
-
if file =~ /\.srg$/
|
459
|
-
return 'srg'
|
460
|
-
elsif file =~ /\.sqg$/
|
461
|
-
return 'sqg'
|
462
|
-
end
|
463
|
-
if IO.read(file, 7,438) == 'Enzyme:'
|
464
|
-
return 'srf'
|
465
|
-
end
|
466
|
-
File.open(file) do |fh|
|
467
|
-
lines = ""
|
468
|
-
8.times { lines << fh.readline }
|
469
|
-
if lines =~ /<bioworksinfo>/
|
470
|
-
return 'bioworks'
|
471
|
-
elsif ((lines =~ /<protein_summary/) and ((lines =~ Proph::ProtSummary::Filetype_and_version_re_old) or (lines =~ Proph::ProtSummary::Filetype_and_version_re_new)))
|
472
|
-
return 'protproph'
|
473
|
-
elsif lines =~ /<msms_pipeline_analysis.*<peptideprophet_summary/m
|
474
|
-
return 'pepproph'
|
475
|
-
end
|
476
|
-
# assumes the header of a sqt file is less than 200 lines ...
|
477
|
-
200.times do
|
478
|
-
line = fh.gets
|
479
|
-
if line
|
480
|
-
lines << line
|
481
|
-
else ; break
|
482
|
-
end
|
483
|
-
end
|
484
|
-
if lines =~ /^H\tDatabase/ and lines =~ /^H\tSQTGenerator/
|
485
|
-
return 'sqt'
|
486
|
-
end
|
487
|
-
end
|
488
|
-
end
|
489
|
-
|
490
|
-
|
491
|
-
##############################################
|
492
|
-
# These are pretty specific to Smriti's needs:
|
493
|
-
|
494
|
-
# Given a hash of peptide arrays by some attribute key
|
495
|
-
# Return two sorted arrays of sorted probabilities
|
496
|
-
# The first of the min and second of the best 10 of each peptide array
|
497
|
-
def min_and_best10(hash)
|
498
|
-
## choose the min probability and sort by prob
|
499
|
-
min_peptides = hash.collect do |k,v|
|
500
|
-
v.min {|a,b| a.peptide_probability <=> b.peptide_probability }
|
501
|
-
end
|
502
|
-
#puts min_peptides[0] # -> Bioworks::Pep
|
503
|
-
min_sorted_peps = sorted_probabilities(min_peptides)
|
504
|
-
#puts min_sorted_peps[0] # -> probability (Float)
|
505
|
-
|
506
|
-
peptides_by_tens = []
|
507
|
-
hash.each do |k,v|
|
508
|
-
arr = v.sort_by {|pep| pep.peptide_probability }.slice(0,10)
|
509
|
-
peptides_by_tens.push(*arr)
|
510
|
-
end
|
511
|
-
|
512
|
-
top_10_sorted_peps = sorted_probabilities(peptides_by_tens)
|
513
|
-
#puts top_10_sorted_peps[0] # -> float
|
514
|
-
#puts "size: top_10_sorted_peps.size : #{top_10_sorted_peps.size}"
|
515
|
-
#puts "size: min_sorted_peps.size : #{min_sorted_peps.size}"
|
516
|
-
#p top_10_sorted_peps
|
517
|
-
#p min_sorted_peps
|
518
|
-
return min_sorted_peps, top_10_sorted_peps
|
519
|
-
end
|
520
|
-
|
521
|
-
# Returns a list of sorted probabilities given the array of peptides
|
522
|
-
def sorted_probabilities(peptides)
|
523
|
-
#puts peptides.first.peptide_probability.class
|
524
|
-
#peptides.each do |pep| print pep.class.to_s + " " end
|
525
|
-
#puts peptides.first.is_a? Array
|
526
|
-
#abort "DFHDFD"
|
527
|
-
peptides.collect{|pep| pep.probability }.sort
|
528
|
-
end
|
529
|
-
|
530
|
-
# returns a sorted lists of probabilities based on all pepprots (a peptide
|
531
|
-
# associated with a protein)
|
532
|
-
def pep_probs_by_pep_prots
|
533
|
-
sorted_probabilities(peps)
|
534
|
-
end
|
535
|
-
|
536
|
-
##########################################################################
|
537
|
-
# WARNING! These might be dangerous to your health if there are multiple
|
538
|
-
# files collected in your bioworks file
|
539
|
-
##########################################################################
|
540
|
-
|
541
|
-
# (prob_list_by_min, prob_list_by_best10)
|
542
|
-
# returns 2 sorted lists of probabilities based on:
|
543
|
-
# 1. best peptide hit
|
544
|
-
# 2. top 10 peptide hits
|
545
|
-
# on a per scan basis
|
546
|
-
# NOTE: you may want to hash on base_name first!
|
547
|
-
def pep_probs_by_scan
|
548
|
-
hash = peps.hash_by(:first_scan, :last_scan)
|
549
|
-
return min_and_best10(hash)
|
550
|
-
end
|
551
|
-
|
552
|
-
|
553
|
-
#(prob_list_by_min, prob_list_by_best10)
|
554
|
-
# same as pep_probs_by_scan but per charge state
|
555
|
-
# NOTE: you may want to hash on base_name first!
|
556
|
-
def pep_probs_by_scan_charge
|
557
|
-
hash = peps.hash_by(:first_scan, :last_scan, :charge)
|
558
|
-
return min_and_best10(hash)
|
559
|
-
end
|
560
|
-
|
561
|
-
# (prob_list_by_min)
|
562
|
-
# hashes on seq-charge and returns the sorted list of probabilities of top
|
563
|
-
# hit per seq-charge
|
564
|
-
# NOTE: you may want to hash on base_name first!
|
565
|
-
def pep_probs_by_seq_charge
|
566
|
-
hash = peps.hash_by(:sequence, :charge)
|
567
|
-
min_peptides = hash.collect do |k,v|
|
568
|
-
v.min {|a,b| a.peptide_probability <=> b.peptide_probability }
|
569
|
-
end
|
570
|
-
sorted_probabilities(min_peptides)
|
571
|
-
end
|
572
|
-
|
573
|
-
##########################################################################
|
574
|
-
# USE these if you have multiple files in your bioworks.xml file
|
575
|
-
##########################################################################
|
576
|
-
# (prob_list_by_min, prob_list_by_best10)
|
577
|
-
# returns 2 sorted lists of probabilities based on:
|
578
|
-
# 1. best peptide hit
|
579
|
-
# 2. top 10 peptide hits
|
580
|
-
# on a per scan basis
|
581
|
-
# NOTE: you may want to hash on base_name first!
|
582
|
-
def pep_probs_by_bn_scan
|
583
|
-
hash = peps.hash_by(:base_name, :first_scan, :last_scan)
|
584
|
-
return min_and_best10(hash)
|
585
|
-
end
|
586
|
-
|
587
|
-
|
588
|
-
#(prob_list_by_min, prob_list_by_best10)
|
589
|
-
# same as pep_probs_by_scan but per charge state
|
590
|
-
# NOTE: you may want to hash on base_name first!
|
591
|
-
def pep_probs_by_bn_scan_charge
|
592
|
-
hash = peps.hash_by(:base_name, :first_scan, :last_scan, :charge)
|
593
|
-
return min_and_best10(hash)
|
594
|
-
end
|
595
|
-
|
596
|
-
# (prob_list_by_min)
|
597
|
-
# hashes on seq-charge and returns the sorted list of probabilities of top
|
598
|
-
# hit per seq-charge
|
599
|
-
# NOTE: you may want to hash on base_name first!
|
600
|
-
def pep_probs_by_bn_seq_charge
|
601
|
-
hash = peps.hash_by(:base_name, :sequence, :charge)
|
602
|
-
min_peptides = hash.collect do |k,v|
|
603
|
-
v.min {|a,b| a.peptide_probability <=> b.peptide_probability }
|
604
|
-
end
|
605
|
-
sorted_probabilities(min_peptides)
|
606
|
-
end
|
607
|
-
end
|
608
|
-
|
609
|
-
# A Generic spectraID protein
|
610
|
-
module SpecID::Prot
|
611
|
-
include ProteinReferenceable
|
612
|
-
|
613
|
-
# probability is always a float!
|
614
|
-
attr_accessor :probability, :reference, :peps
|
615
|
-
|
616
|
-
def <=> (other)
|
617
|
-
self.reference <=> other.reference
|
618
|
-
end
|
619
|
-
|
620
|
-
def inspect
|
621
|
-
pep_string =
|
622
|
-
if peps
|
623
|
-
", @peps(#)=#{peps.size}"
|
624
|
-
end
|
625
|
-
"<#{self.class} @probability=#{probability}, @reference=#{reference}#{pep_string}>"
|
626
|
-
end
|
627
|
-
|
628
|
-
end
|
629
|
-
|
630
|
-
module SpecID::Pep
|
631
|
-
|
632
|
-
Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
|
633
|
-
|
634
|
-
attr_accessor :prots
|
635
|
-
attr_accessor :probability
|
636
|
-
# full sequence: (<firstAA>.<sequence>.<last>) with '-' for no first
|
637
|
-
# or last.
|
638
|
-
attr_accessor :sequence
|
639
|
-
|
640
|
-
# the basic amino acid sequence (no leading or trailing '.' or amino acids)
|
641
|
-
# should not contain any special symbols, etc.
|
642
|
-
attr_accessor :aaseq
|
643
|
-
attr_accessor :charge
|
644
|
-
|
645
|
-
# removes nonstandard chars with Non_standard_amino_acid_char_re
|
646
|
-
# preserves A-Z and '.' and '-'
|
647
|
-
def self.remove_non_amino_acids(sequence)
|
648
|
-
sequence.gsub(Non_standard_amino_acid_char_re, '')
|
649
|
-
end
|
650
|
-
|
651
|
-
# remove_non_amino_acids && split_sequence
|
652
|
-
def self.prepare_sequence(val)
|
653
|
-
nv = remove_non_amino_acids(val)
|
654
|
-
split_sequence(nv)
|
655
|
-
end
|
656
|
-
|
657
|
-
def <=>(other)
|
658
|
-
aaseq <=> other.aaseq
|
659
|
-
end
|
660
|
-
|
661
|
-
# Returns prev, peptide, next from sequence. Parse errors return
|
662
|
-
# nil,nil,nil
|
663
|
-
# R.PEPTIDE.A # -> R, PEPTIDE, A
|
664
|
-
# R.PEPTIDE.- # -> R, PEPTIDE, -
|
665
|
-
# PEPTIDE.A # -> -, PEPTIDE, A
|
666
|
-
# A.PEPTIDE # -> A, PEPTIDE, -
|
667
|
-
# PEPTIDE # -> nil,nil,nil
|
668
|
-
def self.split_sequence(val)
|
669
|
-
peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
|
670
|
-
pieces = val.split('.')
|
671
|
-
case pieces.size
|
672
|
-
when 3
|
673
|
-
peptide_prev_aa, peptide, peptide_next_aa = *pieces
|
674
|
-
when 2
|
675
|
-
if pieces[0].size > 1 ## N termini
|
676
|
-
peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
|
677
|
-
else ## C termini
|
678
|
-
peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
|
679
|
-
end
|
680
|
-
when 1 ## this must be a parse error!
|
681
|
-
peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
|
682
|
-
when 0
|
683
|
-
peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
|
684
|
-
end
|
685
|
-
return peptide_prev_aa, peptide, peptide_next_aa
|
686
|
-
end
|
687
|
-
|
688
|
-
##
|
689
|
-
def self.sequence_to_aaseq(sequence)
|
690
|
-
after_removed = remove_non_amino_acids(sequence)
|
691
|
-
pieces = after_removed.split('.')
|
692
|
-
case pieces.size
|
693
|
-
when 3
|
694
|
-
pieces[1]
|
695
|
-
when 2
|
696
|
-
if pieces[0].size > 1 ## N termini
|
697
|
-
pieces[0]
|
698
|
-
else ## C termini
|
699
|
-
pieces[1]
|
700
|
-
end
|
701
|
-
when 1 ## this must be a parse error!
|
702
|
-
pieces[0] ## which is the peptide itself
|
703
|
-
else
|
704
|
-
abort "bad peptide sequence: #{sequence}"
|
705
|
-
end
|
706
|
-
end
|
707
|
-
|
708
|
-
# This will rapidly determine the list of proteins for which given
|
709
|
-
# peptides belong. It is meant to be low level and fast (eventually),
|
710
|
-
# so it asks for the data in a format amenable to this.
|
711
|
-
# returns a mirror array where each entry is an array of Fasta::Prot
|
712
|
-
# objects where each protein contains the sequence
|
713
|
-
def self.protein_groups_by_sequence(peptide_strings_list, fasta_obj)
|
714
|
-
prots = fasta_obj.prots
|
715
|
-
prot_seqs = prots.map do |prot|
|
716
|
-
prot.aaseq
|
717
|
-
end
|
718
|
-
|
719
|
-
groups = peptide_strings_list.map do |pep_seq|
|
720
|
-
prot_index = 0
|
721
|
-
protein_group = []
|
722
|
-
prot_seqs.each do |prot_seq|
|
723
|
-
if prot_seq.include? pep_seq
|
724
|
-
protein_group << prots[prot_index]
|
725
|
-
end
|
726
|
-
prot_index += 1
|
727
|
-
end
|
728
|
-
protein_group
|
729
|
-
end
|
730
|
-
|
731
|
-
groups
|
732
|
-
end
|
733
|
-
|
734
|
-
# units can be :mmu, :amu, :ppm
|
735
|
-
def mass_accuracy(pep, unit=:ppm, mono=true)
|
736
|
-
# 10^6 * deltam accuracy/ m[measured]
|
737
|
-
# i.e., theoretical mass 1000, measured 999.9: 100ppm
|
738
|
-
# http://www.waters.com/WatersDivision/ContentD.asp?watersit=EGOO-66LRQD
|
739
|
-
# pep.mass is the theoretical M+H of the peptide
|
740
|
-
# this assumes that the deltacn value we're being told is correct, but I
|
741
|
-
# have my suspicions (since the <mass> value is not accurate...)
|
742
|
-
|
743
|
-
######## TO COMPLETE (and add to spec_id..?)
|
744
|
-
case unit
|
745
|
-
when :ppm
|
746
|
-
when :amu
|
747
|
-
when :mmu
|
748
|
-
end
|
749
|
-
end
|
750
|
-
|
751
|
-
# calls the method associated with each key and returns the value
|
752
|
-
def values_at(*args)
|
753
|
-
args.map do |arg|
|
754
|
-
send(arg)
|
755
|
-
end
|
756
|
-
end
|
757
|
-
|
758
|
-
def inspect
|
759
|
-
|
760
|
-
prot_string =
|
761
|
-
if prots
|
762
|
-
", @prots(#)=#{prots.size}"
|
763
|
-
end
|
764
|
-
"<#{self.class} @probability=#{probability}, @sequence=#{sequence}, @aaseq=#{aaseq}, @charge=#{charge}#{prot_string}>"
|
765
|
-
end
|
766
|
-
|
767
|
-
end
|
768
|
-
|
769
|
-
class SpecID::GenericProt
|
770
|
-
include SpecID::Prot
|
771
|
-
end
|
772
|
-
|
773
|
-
class SpecID::GenericPep
|
774
|
-
include SpecID::Pep
|
775
|
-
end
|
776
|
-
|
777
|
-
|
778
|
-
|