mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/transmem.rb
DELETED
@@ -1,157 +0,0 @@
|
|
1
|
-
|
2
|
-
# A transmemIndex is a hash that takes a fasta reference as key and returns
|
3
|
-
# a structured hash containing the transmembrane information.
|
4
|
-
module TransmemIndex
|
5
|
-
|
6
|
-
# returns :toppred or :phobius
|
7
|
-
def self.filetype(file)
|
8
|
-
tp = nil
|
9
|
-
File.open(file) do |fh|
|
10
|
-
while (line = fh.gets)
|
11
|
-
case line
|
12
|
-
when /SEQENCE/
|
13
|
-
tp = :phobius
|
14
|
-
break
|
15
|
-
when / 0 0 i/
|
16
|
-
tp = :phobius # if they don't have the headers,
|
17
|
-
# this will pick it up if they have a
|
18
|
-
# single prot without tm or signal peptide.
|
19
|
-
break
|
20
|
-
when /Algorithm specific parameters/
|
21
|
-
tp = :toppred # New text
|
22
|
-
break
|
23
|
-
when /<parameters>/
|
24
|
-
tp = :toppred # XML
|
25
|
-
break
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
tp
|
30
|
-
end
|
31
|
-
|
32
|
-
def reference_to_key(reference)
|
33
|
-
# needs to be subclassed or written
|
34
|
-
end
|
35
|
-
|
36
|
-
# right now accepts toppred.out files
|
37
|
-
# Phobius objects can use the fasta object to update their hash for methods
|
38
|
-
# like avg_overlap
|
39
|
-
def self.new(file, fasta=nil)
|
40
|
-
case x = filetype(file)
|
41
|
-
when :toppred
|
42
|
-
require 'transmem/toppred'
|
43
|
-
TopPred::Index.new(file)
|
44
|
-
when :phobius
|
45
|
-
require 'transmem/phobius'
|
46
|
-
# warn "WARNING: You have NO fasta object with Phobius based TransmemIndex! (which needs one to do proper indexing!)" unless fasta
|
47
|
-
Phobius::Index.new(file, fasta)
|
48
|
-
else
|
49
|
-
raise ArgumentError, "#{x} filetype for #{file} not recognized!"
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
# returns a hash of key -> num certain transmembrane segments
|
54
|
-
def num_certain_index
|
55
|
-
hash = {}
|
56
|
-
self.each do |k,v|
|
57
|
-
hash[k] = v[:num_certain_transmembrane_segments] || 0
|
58
|
-
end
|
59
|
-
hash
|
60
|
-
end
|
61
|
-
|
62
|
-
# tp = :number or :fraction which is the fraction of the sequence size
|
63
|
-
# returns the average number of overlapping amino acids with transmembrane
|
64
|
-
# segments
|
65
|
-
# returns nil if there is no protein by that key
|
66
|
-
def avg_overlap(key, sequence, tp=:number)
|
67
|
-
if self.key? key
|
68
|
-
numbers = num_transmem_aa(self[key], sequence)
|
69
|
-
if numbers.size > 0
|
70
|
-
sum = 0
|
71
|
-
numbers.each {|num| sum += num}
|
72
|
-
avg_num = sum.to_f / numbers.size
|
73
|
-
# the one line way to do it
|
74
|
-
#avg_num = numbers.inject(0) {|memo,num| num + memo }.to_f / numbers.size
|
75
|
-
if tp == :fraction
|
76
|
-
avg_num / sequence.size
|
77
|
-
# this is the same as doing this:
|
78
|
-
#numbers.inject(0.0) {|memo,num| (num.to_f/seq_size + memo) } / numbers.size
|
79
|
-
else
|
80
|
-
avg_num
|
81
|
-
end
|
82
|
-
else
|
83
|
-
0.0
|
84
|
-
end
|
85
|
-
else # what to do if the protein isn't there?? which happens on occasion
|
86
|
-
nil
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
# returns an array (usually length of 1) of the number of amino acids
|
91
|
-
# contained inside transmembrane spanning segments.
|
92
|
-
# assumes that tmhash has the key 'transmembrane_segments'
|
93
|
-
# if there are no transmembrane segments, returns empty array.
|
94
|
-
def num_transmem_aa(tmhash, sequence)
|
95
|
-
if tmhash.key? :transmembrane_segments
|
96
|
-
ranges = tmhash[:transmembrane_segments].map do |tmseg|
|
97
|
-
Range.new( tmseg[:start]-1, tmseg[:stop]-1 )
|
98
|
-
end
|
99
|
-
num_overlapping_chars(tmhash[:aaseq], ranges, sequence)
|
100
|
-
else
|
101
|
-
[]
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
# returns an array of the number of overlapping sequences in substring with
|
106
|
-
# the substrings defined in start_stop_doublets within full_sequence
|
107
|
-
# start_stop_doublets should be 0 indexed!!!
|
108
|
-
# the span includes the 'stop' position i.e., full_sequence[start..stop]
|
109
|
-
def num_overlapping_chars(full_sequence, ranges, substring)
|
110
|
-
#start_positions = aaseq.enum_for(:scan, substring).map { $~.offset(0)[0]}
|
111
|
-
if ranges.size == 0
|
112
|
-
[]
|
113
|
-
#full_sequence.enum_for(:scan, substring).map { 0 }
|
114
|
-
else
|
115
|
-
substring_ranges = []
|
116
|
-
pos = 0
|
117
|
-
slen = substring.size
|
118
|
-
while i=full_sequence.index(substring,pos)
|
119
|
-
substring_ranges << Range.new(i, i+slen-1)
|
120
|
-
pos = i + slen
|
121
|
-
end
|
122
|
-
# brute force way
|
123
|
-
last_tm_range = ranges.last.last
|
124
|
-
to_return = substring_ranges.map do |sb|
|
125
|
-
overlap = 0
|
126
|
-
# there's got to be a much simpler way to do this, but this does work...
|
127
|
-
ranges.each do |tm|
|
128
|
-
(frst, lst) =
|
129
|
-
if tm.include?( sb.first )
|
130
|
-
[tm, sb]
|
131
|
-
elsif tm.include?( sb.last )
|
132
|
-
[sb, tm]
|
133
|
-
else
|
134
|
-
nil
|
135
|
-
end
|
136
|
-
if frst
|
137
|
-
if lst.last <= frst.last
|
138
|
-
overlap += (frst.last+1 - frst.first) - (lst.first - frst.first) - (frst.last - lst.last)
|
139
|
-
else
|
140
|
-
overlap += (frst.last+1 - frst.first) - (lst.first - frst.first)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
end
|
144
|
-
overlap
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
|
150
|
-
end
|
151
|
-
|
152
|
-
|
153
|
-
#substring_ranges = full_sequence.enum_for(:scan, substring).map do
|
154
|
-
# (ofirst, olast) = $~.offset(0)
|
155
|
-
# Range.new(ofirst, olast - 1)
|
156
|
-
# end
|
157
|
-
|
data/lib/validator/aa.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'validator/digestion_based'
|
2
|
-
require 'fasta'
|
3
|
-
require 'spec_id/aa_freqs'
|
4
|
-
|
5
|
-
# Constraints on aaseq attribute of peptides (the bare amino acid sequence)
|
6
|
-
# works by calculating amino acid frequencies in the fasta file used.
|
7
|
-
class Validator::AA < Validator::DigestionBased
|
8
|
-
include Precision::Calculator
|
9
|
-
|
10
|
-
attr_accessor :constraint
|
11
|
-
|
12
|
-
# it is a false hit if the amino acid is located in the peptide
|
13
|
-
attr_accessor :false_if_found
|
14
|
-
|
15
|
-
DEFAULTS = Validator::DigestionBased::DEFAULTS.merge( {
|
16
|
-
:false_if_found => true,
|
17
|
-
} )
|
18
|
-
|
19
|
-
# returns tp, fp
|
20
|
-
def partition(peps)
|
21
|
-
(found, not_found) = peps.partition do |pep|
|
22
|
-
pep.aaseq.include?(@constraint)
|
23
|
-
end
|
24
|
-
if @false_if_found
|
25
|
-
[not_found, found]
|
26
|
-
else
|
27
|
-
[found, not_found]
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# right now only accepts single amino acids as constraints (as a string,
|
32
|
-
# e.g. 'C', or symbol, e.g. :C)
|
33
|
-
# options:
|
34
|
-
# :false_to_total_ratio => if a true digestion was already performed (see
|
35
|
-
# Validator::AA.calc_false_to_total_ratio)
|
36
|
-
# :false_if_found => it is a false positive if the amino acid is found.
|
37
|
-
# :background => the background level of amino acid Float
|
38
|
-
def initialize(constraint, options={})
|
39
|
-
@constraint = constraint.to_s
|
40
|
-
opts = DEFAULTS.merge(options)
|
41
|
-
(@false_to_total_ratio, @false_if_found, @background) = opts.values_at(:false_to_total_ratio, :false_if_found, :background)
|
42
|
-
end
|
43
|
-
|
44
|
-
def to_param_string
|
45
|
-
"aminoacid(bad_aa)=" + ["{constraint=#{@constraint}", "false_to_total_ratio=#{@false_to_total_ratio}", "bkg=#{(@background ? @background : 0.0) }}"].join(", ")
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
data/lib/validator/aa_est.rb
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
require 'validator/aa'
|
2
|
-
|
3
|
-
|
4
|
-
class Validator ; end
|
5
|
-
class Validator::AA ; end
|
6
|
-
|
7
|
-
# A class that uses the peps given to it and a background frequency to
|
8
|
-
# calculate the false_to_total_ratio at each turn.
|
9
|
-
class Validator::AAEst < Validator::AA
|
10
|
-
attr_accessor :constraint
|
11
|
-
attr_accessor :false_if_found
|
12
|
-
|
13
|
-
# the frequency of the amino acid is used to estimate the false to
|
14
|
-
# total ratio based on the pephits given for pephit_precision.
|
15
|
-
# see Validator::AA.calc_frequency to calculate a frequency
|
16
|
-
# or use set_frequency to set from pep hits.
|
17
|
-
attr_accessor :frequency
|
18
|
-
|
19
|
-
DEFAULTS = {
|
20
|
-
:false_if_found => true
|
21
|
-
}.merge(Validator::DigestionBased::DEFAULTS) # background 0.0
|
22
|
-
|
23
|
-
# only takes a string right now for constraint
|
24
|
-
def initialize(constraint, options={})
|
25
|
-
@constraint = constraint.to_s
|
26
|
-
opts = DEFAULTS.merge(options)
|
27
|
-
(@frequency, @false_if_found, @background) = opts.values_at(:frequency, :false_if_found, :background)
|
28
|
-
end
|
29
|
-
|
30
|
-
def pephit_precision(peps)
|
31
|
-
set_false_to_total_ratio(peps)
|
32
|
-
super(peps)
|
33
|
-
end
|
34
|
-
|
35
|
-
def set_false_to_total_ratio(peps)
|
36
|
-
if peps.size > 0
|
37
|
-
expected = 0.0
|
38
|
-
peps.each do |pep|
|
39
|
-
expected += (1.0 - ((1.0 - @frequency)**pep.aaseq.size))
|
40
|
-
end
|
41
|
-
@false_to_total_ratio = expected / peps.size
|
42
|
-
else
|
43
|
-
@false_to_total_ratio = 1.0
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def set_ongoing_false_to_total_ratio(peps)
|
48
|
-
if peps.size > 0
|
49
|
-
peps.each do |pep|
|
50
|
-
@expected += (1.0 - ((1.0-@frequency)**pep.aaseq.size))
|
51
|
-
end
|
52
|
-
# @increment_total_submitted should == @increment_tps and @increment_fps
|
53
|
-
# since these are either/or
|
54
|
-
@false_to_total_ratio = @expected / @increment_total_submitted
|
55
|
-
else
|
56
|
-
@false_to_total_ratio = 1.0
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
def to_param_string
|
62
|
-
"aminoacid(bad_aa)=" + ["{constraint=#{@constraint}", "frequency=#{@frequency}", "bkg=#{(@background ? @background : 0.0) }}"].join(", ")
|
63
|
-
end
|
64
|
-
|
65
|
-
# takes objects responding to aaseq and sets the frequency based on
|
66
|
-
# constraint. constraint is one acceptable to initialize! returns self
|
67
|
-
def set_frequency(objs)
|
68
|
-
table = SpecID::AAFreqs.new.calculate_frequencies(objs)
|
69
|
-
@frequency = table[@constraint.to_sym]
|
70
|
-
self
|
71
|
-
end
|
72
|
-
|
73
|
-
# if adding pephits in groups at a time, the entire group does not need to be
|
74
|
-
# queried, just the individual hit. Use this OR pephits_precision (NOT
|
75
|
-
# both). The initial query to this method will begin a running tally that
|
76
|
-
# is saved by the validator.
|
77
|
-
# takes either an array or a single pephit (determined by if it is a
|
78
|
-
# SpecID::Pep)
|
79
|
-
def increment_pephits_precision(peps)
|
80
|
-
tmp = $VERBOSE; $VERBOSE = nil
|
81
|
-
unless @increment_initialized
|
82
|
-
initialize_increment
|
83
|
-
@expected = 0.0
|
84
|
-
end
|
85
|
-
$VERBOSE = tmp
|
86
|
-
|
87
|
-
to_submit =
|
88
|
-
if peps.is_a? SpecID::Pep
|
89
|
-
[peps]
|
90
|
-
else
|
91
|
-
peps
|
92
|
-
end
|
93
|
-
@increment_total_submitted += to_submit.size
|
94
|
-
(tps, fps) = partition(to_submit)
|
95
|
-
#### THIS IS THE MAGIC FOR THIS VALIDATOR:
|
96
|
-
set_ongoing_false_to_total_ratio(to_submit)
|
97
|
-
|
98
|
-
@increment_tps += tps.size
|
99
|
-
@increment_fps += fps.size
|
100
|
-
(num_tps, num_fps) =
|
101
|
-
if self.respond_to?(:calc_precision_prep) # for digestion based validators
|
102
|
-
(num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
|
103
|
-
[num_tps, num_fps]
|
104
|
-
else
|
105
|
-
[@increment_tps, @increment_fps]
|
106
|
-
end
|
107
|
-
calc_precision(num_tps, num_fps)
|
108
|
-
end
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
end
|
data/lib/validator/background.rb
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
require 'validator'
|
2
|
-
require 'vec'
|
3
|
-
require 'enumerator'
|
4
|
-
|
5
|
-
class Validator ; end
|
6
|
-
class Validator::Background
|
7
|
-
|
8
|
-
attr_accessor :data
|
9
|
-
|
10
|
-
def initialize(data=nil)
|
11
|
-
@data = data
|
12
|
-
end
|
13
|
-
|
14
|
-
def delete_nan!(vec)
|
15
|
-
vec.each_with_index do |v,i|
|
16
|
-
if v.nan?
|
17
|
-
vec[i] = 0
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def stdev_plus_spread(stdev_factor=2.0, stdev_points=15, min_window_pre=5, min_window_post=5)
|
23
|
-
data_vec = VecD[*@data]
|
24
|
-
delete_nan!(data_vec)
|
25
|
-
stdev_transform = data_vec.transform(9) {|vec| (stdev_factor * vec.sample_stats[1]) + vec.spread }
|
26
|
-
smoothed_stdev = stdev_transform.transform(9) {|vec| vec.avg }
|
27
|
-
smoothed_stdev_derivs = smoothed_stdev.chim
|
28
|
-
last_0_index = index_of_last_0(smoothed_stdev_derivs)
|
29
|
-
min_in_window(data_vec, last_0_index, min_window_pre, min_window_post)
|
30
|
-
end
|
31
|
-
|
32
|
-
def plot(vec)
|
33
|
-
`graph #{vec.join(" ")} -a -T X`
|
34
|
-
end
|
35
|
-
|
36
|
-
# not really working right currently
|
37
|
-
def derivs(avg_points=15, min_window_pre=5, min_window_post=5)
|
38
|
-
data_vec = VecD[*@data]
|
39
|
-
delete_nan!(data_vec)
|
40
|
-
drvs = data_vec.chim
|
41
|
-
# absolute value
|
42
|
-
drvs.each_with_index {|x,i| drvs[i] = x.abs }
|
43
|
-
mv_avg = drvs.transform(avg_points) {|v| v.avg }
|
44
|
-
last_0_index = index_of_last_0(mv_avg.chim)
|
45
|
-
min_in_window(data_vec, last_0_index, min_window_pre, min_window_post)
|
46
|
-
end
|
47
|
-
|
48
|
-
def index_of_last_0(vec)
|
49
|
-
last_0_index = nil
|
50
|
-
vec.each_with_index do |v,i|
|
51
|
-
if v == 0
|
52
|
-
last_0_index = i
|
53
|
-
end
|
54
|
-
end
|
55
|
-
last_0_index
|
56
|
-
end
|
57
|
-
|
58
|
-
# returns the minimum value in the window centered on index
|
59
|
-
def min_in_window(vec, index, pre, post)
|
60
|
-
last_index = vec.size - 1
|
61
|
-
start = index - pre
|
62
|
-
stop = index + post
|
63
|
-
start = 0 if start < 0
|
64
|
-
stop = last_index if stop > last_index
|
65
|
-
vec[start..stop].min
|
66
|
-
end
|
67
|
-
|
68
|
-
# very simple, should work
|
69
|
-
def min_mesa(start, stop, points=3)
|
70
|
-
data_vec = VecD[*@data]
|
71
|
-
delete_nan!(data_vec)
|
72
|
-
smoothed = data_vec.transform(3) {|v| v.avg }
|
73
|
-
smoothed[start..stop].min
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
data/lib/validator/bias.rb
DELETED
@@ -1,95 +0,0 @@
|
|
1
|
-
require 'validator'
|
2
|
-
require 'validator/digestion_based'
|
3
|
-
|
4
|
-
# class for any generic kind of bias. For instance, a list of high abundance
|
5
|
-
# proteins we would expect to see, or a list of low abundance proteins we
|
6
|
-
# would not expect to see, or proteins that have been filtered out in some
|
7
|
-
# way, etc.
|
8
|
-
class Validator::Bias < Validator::DigestionBased
|
9
|
-
include Precision::Calculator
|
10
|
-
|
11
|
-
# a fasta object (by default containing proteins expected to be in the
|
12
|
-
# sample [see proteins_expected to modify that behavior])
|
13
|
-
attr_reader :fasta
|
14
|
-
|
15
|
-
# correct_wins means that only a single protein from a pep.aaseq must match
|
16
|
-
# the fasta object for the pep hit to be considered valid. Otherwise, all
|
17
|
-
# must be a match (logic negated by proteins_expected)
|
18
|
-
attr_accessor :correct_wins
|
19
|
-
|
20
|
-
# proteins_expected==true means we expect to see the proteins in the sample
|
21
|
-
# proteins_expected==false means we do not expect to see these proteins in
|
22
|
-
# the sample
|
23
|
-
attr_accessor :proteins_expected
|
24
|
-
|
25
|
-
# a hash made by taking each fasta reference in fasta_object, (everything
|
26
|
-
# until a space) and setting the value to true. It can be queried with the
|
27
|
-
# start of an fasta sequence
|
28
|
-
attr_accessor :short_reference_hash
|
29
|
-
|
30
|
-
DEFAULTS = Validator::DigestionBased::DEFAULTS.merge( {
|
31
|
-
:proteins_expected => true,
|
32
|
-
:correct_wins => true,
|
33
|
-
} )
|
34
|
-
|
35
|
-
# options:
|
36
|
-
# (t = true, f = false, '*'= default)
|
37
|
-
# :proteins_expected => *t/f we expect to see the fasta proteins in our hit list
|
38
|
-
# :correct_wins => *t/f a single peptide hit from one of these proteins
|
39
|
-
# constitutes a true positive
|
40
|
-
# :background => Float (*0.0-1.0)
|
41
|
-
# :false_to_total_ratio => Float (*nil by default)
|
42
|
-
def initialize(fasta_object, options={})
|
43
|
-
opts = DEFAULTS.merge(options)
|
44
|
-
(@proteins_expected, @correct_wins, @background, @false_to_total_ratio) = opts.values_at(:proteins_expected, :correct_wins, :background, :false_to_total_ratio)
|
45
|
-
@fasta = fasta_object
|
46
|
-
@header_split_hash = @fasta.prots.map {|prot| prot.reference }
|
47
|
-
@short_reference_hash = self.class.make_short_reference_hash(fasta_object)
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.make_short_reference_hash(fasta_object)
|
51
|
-
hash = {}
|
52
|
-
fasta_object.each do |prot|
|
53
|
-
hash[prot.first_entry] = true
|
54
|
-
end
|
55
|
-
hash
|
56
|
-
end
|
57
|
-
|
58
|
-
def partition(peps)
|
59
|
-
klass = self.class
|
60
|
-
cw =
|
61
|
-
if !@proteins_expected
|
62
|
-
!@correct_wins
|
63
|
-
else
|
64
|
-
@correct_wins
|
65
|
-
end
|
66
|
-
|
67
|
-
(tp, fp) =
|
68
|
-
if cw
|
69
|
-
peps.partition do |pep|
|
70
|
-
pep.prots.any? do |pepprot|
|
71
|
-
@short_reference_hash.key?( pepprot.first_entry )
|
72
|
-
end
|
73
|
-
end
|
74
|
-
else
|
75
|
-
peps.partition do |pep|
|
76
|
-
pep.prots.any? do |pepprot|
|
77
|
-
!@short_reference_hash.key?( pepprot.first_entry )
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
if !@correct_wins
|
83
|
-
tp, fp = fp, tp
|
84
|
-
end
|
85
|
-
|
86
|
-
[tp, fp]
|
87
|
-
end
|
88
|
-
|
89
|
-
# pephit_precision is done through inheritance
|
90
|
-
|
91
|
-
def to_param_string
|
92
|
-
"abundance=" + ["{fasta=#{@fasta.filename}", "proteins_expected=#{@proteins_expected}", "correct_wins=#{@correct_wins}", "background=#{@background}}"].join(", ")
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|