ms-error_rate 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/.autotest ADDED
@@ -0,0 +1,14 @@
1
+ # -*- ruby -*-
2
+
3
+ Autotest.add_hook :initialize do |at|
4
+ at.clear_mappings
5
+ end
6
+
7
+ Autotest.add_hook :initialize do |at|
8
+ at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
9
+ #["spec/#{m[1]}_spec.rb"]
10
+ #["test/#{m[1]}_test.rb"]
11
+ ## for both specs and tests:
12
+ ["spec/#{m[1]}_spec.rb","test/#{m[1]}_test.rb"]
13
+ }
14
+ end
data/.gitmodules ADDED
@@ -0,0 +1,9 @@
1
+ [submodule "submodule/ms-testdata"]
2
+ path = submodule/ms-testdata
3
+ url = git://github.com/bahuvrihi/ms-testdata.git
4
+ [submodule "submodule/ms-in_silico"]
5
+ path = submodule/ms-in_silico
6
+ url = git://github.com/bahuvrihi/ms-in_silico.git
7
+ [submodule "submodule/tap-mechanize"]
8
+ path = submodule/tap-mechanize
9
+ url = git://github.com/bahuvrihi/tap-mechanize.git
data/History ADDED
@@ -0,0 +1,16 @@
1
+ == 0.0.6
2
+
3
+ * changed peptide centric db output to full YAML (i.e., the protein IDs are in an inline array)
4
+
5
+ == 0.0.3
6
+
7
+ * switching to ms-template-ish structure
8
+
9
+ == 0.0.2 / 2009-10-14
10
+
11
+ * basic validation with peptide and protein centric sample bias validation.
12
+ * peptide centric database created that include methionine cleavage.
13
+
14
+ == 0.0.1 / 2009-08-25
15
+
16
+ * initial work - borrowing basic structure from ms-sequest and using original mspire lib/validators work.
data/LICENSE CHANGED
@@ -1,6 +1,8 @@
1
1
  Copyright shared among contributing institutions:
2
2
  Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
3
  Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
4
+ Copyright (c) 2011 Brigham Young University (additions)
5
+ Authored by John T. Prince
4
6
 
5
7
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
8
  of this software and associated documentation files (the "Software"), to deal
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+
2
+ require 'rubygems'
3
+ require 'rake'
4
+
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
8
+ gem.name = "ms-error_rate"
9
+ gem.homepage = "http://github.com/jtprince/ms-error_rate"
10
+ gem.license = "MIT"
11
+ gem.summary = %Q{An mspire library for calculating error rates in MS/MS identifications (FDRs).}
12
+ gem.description = %Q{aids for creating and calculating error rates using target-decoy searches and sample validation.}
13
+ gem.email = "jtprince@gmail.com"
14
+ gem.authors = ["John Prince"]
15
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
16
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
17
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
18
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
19
+ gem.rubyforge_project = 'mspire'
20
+ gem.add_runtime_dependency("ms-core", ">= 0.0.2")
21
+ gem.add_runtime_dependency("ms-ident", ">= 0.0.20")
22
+ gem.add_development_dependency "spec-more", ">= 0"
23
+ gem.add_development_dependency "jeweler", "~> 1.5.2"
24
+ gem.add_development_dependency "rcov", ">= 0"
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:spec) do |spec|
30
+ spec.libs << 'lib' << 'spec'
31
+ spec.pattern = 'spec/**/*_spec.rb'
32
+ spec.verbose = true
33
+ end
34
+
35
+ #require 'rcov/rcovtask'
36
+ #Rcov::RcovTask.new do |spec|
37
+ # spec.libs << 'spec'
38
+ # spec.pattern = 'spec/**/*_spec.rb'
39
+ # spec.verbose = true
40
+ #end
41
+
42
+ task :default => :spec
43
+
44
+ require 'rake/rdoctask'
45
+ Rake::RDocTask.new do |rdoc|
46
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
47
+
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "ms-error_rate #{version}"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
@@ -0,0 +1,27 @@
1
+
2
+ module Ms
3
+ module ErrorRate
4
+ module Decoy
5
+ # this is the # true positives (found by estimating the number of false
6
+ # hits using the # decoy)
7
+ # frit == fraction
8
+ def self.precision(num_target, num_decoy, frit=1.0)
9
+ # will calculate as floats in case fractional amounts passed in for
10
+ # whatever reason
11
+ num_target_f = num_target.to_f
12
+ num_true_pos = num_target_f - (num_decoy.to_f * frit)
13
+ precision =
14
+ if num_target_f == 0.0
15
+ if num_decoy.to_f > 0.0
16
+ 0.0
17
+ else
18
+ 1.0
19
+ end
20
+ else
21
+ num_true_pos/num_target_f
22
+ end
23
+ precision
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,93 @@
1
+
2
+ require 'ms/mascot/dat'
3
+ require 'ms/error_rate/qvalue'
4
+ require 'ms/error_rate/qvalue/mascot'
5
+
6
+ module Ms
7
+ module ErrorRate
8
+ module Qvalue
9
+ module Mascot
10
+ module Percolator
11
+
12
+ module_function
13
+ # returns an array of Structs where the keys are the first line
14
+ # everything is cast properly
15
+ # three additional keys are available query_num, rank, sequence
16
+ # sequence is the amino acid sequence without the surrounding X's
17
+ # and dots.
18
+ # (with '-' substituted for '_')
19
+ def tab_txt(file)
20
+ hits = []
21
+ File.open(file) do |io|
22
+ # PSMId score q-value posterior_error_prob peptide proteinIds
23
+ atts = io.gets.chomp.split("\t").map {|v| v.gsub('-', '_').to_sym }
24
+ atts.push(:query_num, :rank, :sequence)
25
+ struct_class = Struct.new("Hit", *atts)
26
+
27
+ io.each do |line|
28
+ (query_rank, score, qvalue, perrp, peptide, *prots ) = line.chomp.split("\t")
29
+ (query, rank) = query_rank.split(';').map {|v| v.split(':').last.to_i }
30
+
31
+ hits << struct_class.new(query_rank, score.to_f, qvalue.to_f, perrp.to_f, peptide, prots, query, rank, peptide.split('.')[1])
32
+ end
33
+ end
34
+ hits
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ module Ms::ErrorRate::Qvalue::Mascot::Percolator
44
+
45
+ module_function
46
+ # returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue)
47
+ # opts =
48
+ # :min_peptide_length => Integer
49
+ def qvalues(datp_files, tab_txt_files, opts={})
50
+ min_pep_len = opts[:min_peptide_length]
51
+
52
+ # we only want the top hit per query title (which should ensure that we
53
+ # get the top hit per scan)
54
+ hits_by_query_title = Hash.new {|h,k| h[k] = [] }
55
+ datp_files.zip(tab_txt_files) do |datp_file, tab_txt_file|
56
+ # build a hash based on the sequence
57
+ structs = Ms::ErrorRate::Qvalue::Mascot::Percolator.tab_txt( tab_txt_file )
58
+ qvalue_by_query_rank = {}
59
+ structs.each do |struct|
60
+ qvalue_by_query_rank[[struct.query_num, struct.rank]] = struct.q_value
61
+ end
62
+
63
+ base_no_ext = File.basename(datp_file, '.*')
64
+ Ms::Mascot::Dat.open(datp_file) do |dat|
65
+ dat.each_peptide_hit(:by => :groups, :yield_nil => false, :with_query => true) do |hits,query|
66
+ hits.each do |hit|
67
+ if qval = qvalue_by_query_rank[[hit.query_num, hit.hit_num]]
68
+ hit_as_struct = Ms::ErrorRate::Qvalue::Mascot::MascotPeptideHit.new(base_no_ext, query.title, query.charge, hit.sequence, hit.score, qval)
69
+ hits_by_query_title[hit_as_struct.query_title] << hit_as_struct
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ final_hits = []
77
+ hits_by_query_title.each do |title, hits|
78
+ best_hit =
79
+ if hits.size == 1
80
+ hits.first
81
+ else
82
+ hits.sort_by(&:mowse).last
83
+ end
84
+ # FILTER HERE:
85
+ # ONLY TAKE the BEST HIT IF it passes any filters
86
+ if min_pep_len
87
+ next unless best_hit.sequence.size >= min_pep_len
88
+ end
89
+ final_hits << best_hit
90
+ end
91
+ final_hits
92
+ end
93
+ end
@@ -0,0 +1,68 @@
1
+ require 'ms/error_rate/qvalue'
2
+ require 'ms/mascot/dat'
3
+
4
+ module Ms
5
+ module ErrorRate
6
+ module Qvalue
7
+ module Mascot
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+
14
+ module Ms::ErrorRate::Qvalue::Mascot
15
+ MEMBERS = [:filename, :query_title, :charge, :sequence, :mowse, :qvalue]
16
+ MascotPeptideHit = Struct.new(*MEMBERS) do
17
+ # emits an array rather than a Struct object
18
+ def to_yaml(*args)
19
+ to_a.to_yaml(*args)
20
+ end
21
+ end
22
+
23
+ module_function
24
+ # returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue)
25
+ # opts =
26
+ # :min_peptide_length => Integer
27
+ def qvalues(target_files, decoy_files, opts={})
28
+ min_pep_len = opts[:min_peptide_length]
29
+
30
+ # we only want the top hit per query title (which should ensure that we
31
+ # get the top hit per scan)
32
+ (target_hits, decoy_hits) = [target_files, decoy_files].map do |files|
33
+ hits_by_query_title = Hash.new {|h,k| h[k] = [] }
34
+ files.each do |file|
35
+ base_no_ext = File.basename(file, '.*')
36
+ Ms::Mascot::Dat.open(file) do |dat|
37
+ dat.each_peptide_hit(:by => :top, :yield_nil => false, :with_query => true) do |hit,query|
38
+
39
+ hit_as_struct = MascotPeptideHit.new(base_no_ext, query.title, query.charge, hit.sequence, hit.score)
40
+ hits_by_query_title[hit_as_struct.query_title] << hit_as_struct
41
+ end
42
+ end
43
+ end
44
+
45
+ final_hits = []
46
+ hits_by_query_title.each do |title, hits|
47
+ best_hit =
48
+ if hits.size == 1
49
+ hits.first
50
+ else
51
+ hits.sort_by(&:mowse).last
52
+ end
53
+ # FILTER HERE:
54
+ # ONLY TAKE the BEST HIT IF it passes any filters
55
+ if min_pep_len
56
+ next unless best_hit.sequence.size >= min_pep_len
57
+ end
58
+ final_hits << best_hit
59
+ end
60
+ final_hits
61
+ end
62
+ pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_hits, decoy_hits, opts, &:mowse)
63
+ pairs.map do |hit, qval|
64
+ hit.qvalue = qval
65
+ hit
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,52 @@
1
+ require 'ms/error_rate/qvalue'
2
+
3
+ module Ms ; end
4
+ module Ms::ErrorRate ; end
5
+ module Ms::ErrorRate::Qvalue ; end
6
+
7
+ module Ms::ErrorRate::Qvalue::Pepxml
8
+ module_function
9
+
10
+ # returns an array of hit and qvalue pairs
11
+ # retrieves the aaseq, charge, and all search_score keys and values for use
12
+ # in the search_hit. caller must provide a sort_by block, where the best
13
+ # hits are last. charge is an integer, and all other search scores are cast
14
+ # as floats. returns the output filename.
15
+ def target_decoy_qvalues(target_pepxml, decoy_pepxml, opt={}, &sort_by)
16
+
17
+ # this is a list of high quality peptide hits associated with each group
18
+ fields = [:aaseq, :charge]
19
+ ss_names = []
20
+ have_ss_names = false
21
+ (target_hits, decoy_hits) = [target_pepxml, decoy_pepxml].map do |file|
22
+ # begin with aaseq, charge
23
+ File.open(file) do |io|
24
+ doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
25
+ # we can work with namespaces, or just remove them ...
26
+ doc.remove_namespaces!
27
+ root = doc.root
28
+ search_hits = root.xpath('//search_hit')
29
+ search_hits.map do |search_hit|
30
+ aaseq = search_hit['peptide']
31
+ charge = search_hit.parent.parent['assumed_charge'].to_i
32
+ search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
33
+ ss_values = []
34
+ search_score_nodes.each do |node|
35
+ ss_names << node['name'].to_sym unless have_ss_names
36
+ ss_values << node['value'].to_f
37
+ end
38
+ have_ss_names = true
39
+ [aaseq, charge, *ss_values]
40
+ end
41
+ end
42
+ end
43
+
44
+ fields.push(*ss_names)
45
+
46
+ peptide_hit_class = Struct.new(*fields)
47
+ (t_hits, d_hits) = [target_hits, decoy_hits].map {|hits| hits.map {|hit_values| peptide_hit_class.new(*hit_values) } }
48
+
49
+ # hit and qvalue pairs
50
+ Ms::ErrorRate::Qvalue.target_decoy_qvalues(t_hits, d_hits, :z_together => opt[:z_together], &sort_by)
51
+ end
52
+ end
@@ -0,0 +1,93 @@
1
+
2
+ require 'set'
3
+ require 'ms/error_rate/decoy'
4
+
5
+
6
+ class Array
7
+ def group_by(&block)
8
+ hash = Hash.new {|h,k| h[k] = [] }
9
+ each do |v|
10
+ hash[block.call(v)] << v
11
+ end
12
+ hash
13
+ end unless [].respond_to?(:group_by)
14
+ end
15
+
16
+ module Ms
17
+
18
+ module ErrorRate
19
+ # For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
20
+ module Qvalue
21
+ module_function
22
+
23
+ # returns a parallel array to target hits with qvalues
24
+ # opts = :z_together true/false (default false) group all charges
25
+ # together.
26
+ # the sort block should sort from worst to best
27
+ # by default, sorting is: {|hit| hit.score} if not provided
28
+ # options also passed through to mixed_target_decoy
29
+ def target_decoy_qvalues(target_hits, decoy_hits, opts={}, &sorting)
30
+ sorting ||= :score
31
+ opts = {:z_together => false}.merge(opts)
32
+ target_set = Set.new(target_hits)
33
+
34
+ # Proc.new doesn't do arity checking
35
+ hit_with_qvalue_pairs = Proc.new do |hits|
36
+ sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
37
+ (target_hits, qvalues) = Ms::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
38
+ target_hits.zip(qvalues)
39
+ end
40
+
41
+ all_together = target_hits + decoy_hits
42
+ if !opts[:z_together]
43
+ hit_with_qvalue_pairs.call(all_together)
44
+ else
45
+ all_hits = []
46
+ by_charge = all_together.group_by(&:charge)
47
+ by_charge.each do |charge,hits|
48
+ all_hits.push(*(hit_with_qvalue_pairs.call(hits)))
49
+ end
50
+ all_hits
51
+ end
52
+ end
53
+
54
+ # returns [target_hits, qvalues] (parallel arrays sorted from best hit to
55
+ # worst hit). expects an array-like object of hits sorted from best to worst
56
+ # hit with decoys interspersed and a target_setlike object that responds to
57
+ # :include? for the hit object assumes the hit is a decoy if not found
58
+ # in the target set! if monotonic is false, then the guarantee that
59
+ # qvalues be monotonically increasing is not respected.
60
+ def mixed_target_decoy(best_to_worst, target_setlike, opts={})
61
+ opts = {:monotonic => true}.merge(opts)
62
+ num_target = 0 ; num_decoy = 0
63
+ monotonic = opts[:monotonic]
64
+ target_hits = []
65
+ qvalues = []
66
+ best_to_worst.each do |hit|
67
+ if target_setlike.include?(hit)
68
+ num_target += 1
69
+ precision = Ms::ErrorRate::Decoy.precision(num_target, num_decoy)
70
+ target_hits << hit
71
+ qvalues << (1.0 - precision)
72
+ else
73
+ num_decoy += 1
74
+ end
75
+ end
76
+ if opts[:monotonic]
77
+ min_qvalue = qvalues.last
78
+ qvalues = qvalues.reverse.map do |val| # from worst to best score
79
+ if min_qvalue < val
80
+ min_qvalue
81
+ else
82
+ min_qvalue = val
83
+ val
84
+ end
85
+ end.reverse
86
+ end
87
+ [target_hits, qvalues]
88
+ end
89
+
90
+
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,30 @@
1
+ require 'ms/error_rate/sbv'
2
+
3
+ module Ms
4
+ module ErrorRate
5
+ class Sbv
6
+ # Constraints on aaseq attribute of peptides (the bare amino acid sequence)
7
+ # works by calculating amino acid frequencies in the fasta file used.
8
+ class PeptideBased
9
+
10
+ def self.generate_hashes(pep_to_prot_file, aa="C", min_num=1 )
11
+ Ms::ErrorRate::Sbv.generate_hashes(pep_to_prot_file, :type_code => "aa_min#{min_num}") do |pep|
12
+ if min_num == 1
13
+ if pep.include?(aa) ; 1
14
+ else ; 0
15
+ end
16
+ else
17
+ count = 0
18
+ pep.each_char {|c| count += 1 if c == aa }
19
+ if count >= min_num ; 1
20
+ else ; 0
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ end # class
27
+ end # Sbv
28
+ end # ER
29
+ end # Ms
30
+
@@ -0,0 +1,39 @@
1
+ require 'ms/fasta'
2
+ require 'ms/error_rate/sbv'
3
+ require 'transmembrane'
4
+
5
+ module Ms
6
+ module ErrorRate
7
+ class Sbv
8
+ module ProteinBased
9
+ DEFAULT_NO_PROTS_VAL = 0.0
10
+ # note the pep to prot hash has proteins in a string separated by a
11
+ # hyphen. returns the names of the files written
12
+ def self.generate_hashes(pep_to_prot_file, protid_to_val, options={})
13
+ options[:protein_hash] = protid_to_val
14
+ options[:type_code] = 'tm' unless options[:type_code]
15
+ files = Ms::ErrorRate::Sbv.generate_hashes(pep_to_prot_file, options) do |prot_return_vals|
16
+
17
+ total_with_bias = 0
18
+ total_known = 0
19
+ prot_return_vals.each do |val|
20
+ if !val.nil?
21
+ total_with_bias += val
22
+ total_known += 1
23
+ end
24
+ end
25
+ if total_known == 0
26
+ DEFAULT_NO_PROTS_VAL
27
+ else
28
+ total_with_bias.to_f / total_known
29
+ end
30
+ end #block
31
+
32
+ files
33
+
34
+ end # end method
35
+ end # module
36
+ end # class
37
+ end # ErrorRate
38
+ end # Ms
39
+
@@ -0,0 +1,111 @@
1
+
2
+
3
+ module Ms
4
+ module ErrorRate
5
+ # Sample Bias Validator
6
+ class Sbv
7
+ LENGTH_EXT = 'freq_by_length'
8
+ AASEQ_EXT = 'by_aaseq'
9
+
10
+ # if a protein hash is given, will yield the return an array of
11
+ # values generated with the value from keying each protein of the
12
+ # peptide. Otherwise, will yield each peptide in turn
13
+ def self.generate_hashes(pep_to_prot_file, opts={})
14
+ op = { :aaseq_ext => AASEQ_EXT,
15
+ :length_ext => LENGTH_EXT,
16
+ :file_ext => '.yml',
17
+ :type_code => '',
18
+ :protein_hash => nil,
19
+ :stderr_counter => true,
20
+ }.merge(opts)
21
+
22
+ base = pep_to_prot_file.chomp(File.extname(pep_to_prot_file))
23
+ freqs = Hash.new {|h,k| h[k] = 0.0 }
24
+ counts = Hash.new {|h,k| h[k] = 0 }
25
+ (fileout1, fileout2) = [:aaseq_ext, :length_ext].map do |type_ext|
26
+ base + '.' + op[:type_code] + '.' + op[type_ext] + op[:file_ext]
27
+ end
28
+ protein_hash = op[:protein_hash]
29
+ pep_count = 0
30
+ if op[:stderr_counter]
31
+ $stderr.print "[working, 100,000 peptides = '.'] "
32
+ $stderr.flush
33
+ end
34
+ File.open(fileout1 , 'w') do |out|
35
+ IO.foreach(pep_to_prot_file) do |line|
36
+ (pep, prot_string) = line.chomp!.split(': ')
37
+
38
+ total_transmembrane = 0
39
+ total_known = 0
40
+ answ =
41
+ if protein_hash
42
+ yield( protein_hash.values_at(*(prot_string.split('-'))) )
43
+ else
44
+ yield(pep)
45
+ end
46
+ out.puts "#{pep}: #{answ}"
47
+ freqs[pep.size] += answ
48
+ counts[pep.size] += 1
49
+ pep_count += 1
50
+ if pep_count % 100000 == 0 && op[:stderr_counter]
51
+ $stderr.print '.'
52
+ $stderr.flush
53
+ end
54
+ end
55
+ end
56
+ $stderr.puts "DONE!" if op[:stderr_counter]
57
+ avg_freq_ar = {}
58
+ freqs.each do |k,v|
59
+ avg_freq_ar[k] = v / counts[k]
60
+ end
61
+ File.open(fileout2, 'w') {|out| out.print avg_freq_ar.to_yaml }
62
+ [fileout1, fileout2]
63
+ end
64
+
65
+
66
+ # a hash by aaseq giving a value between 0 and 1 telling how much of
67
+ # an indicator the hit is
68
+ attr_accessor :indicator_by_aaseq
69
+
70
+ attr_accessor :frequency_indicator_opposite
71
+
72
+ attr_accessor :size_to_freq
73
+
74
+ # boolean
75
+ attr_accessor :indicators_signify_true_hit
76
+
77
+
78
+ def initialize(indicator_by_aaseq_hash, size_to_freq, frequency_indicator_opposite, indicators_signify_true_hit=true)
79
+ @indicators_signify_true_hit = indicators_signify_true_hit
80
+ @frequency_indicator_opposite = frequency_indicator_opposite
81
+ @indicator_by_aaseq = indicator_by_aaseq_hash
82
+ @tot_num_indicators = 0.0
83
+ @tot_num = 0
84
+ end
85
+
86
+ # returns the cumulative precision (fraction of true positives among
87
+ # total hits) frequency_of_indicators is the probability that a generic
88
+ # amino acid sequence will be an indicator (this may variable by
89
+ # sequence length).
90
+ def update_precision(aaseq)
91
+ @tot_num_indicators << indicator_by_aaseq[aaseq]
92
+ @tot_num += 1
93
+ @frequency_of_indicators_sum += @size_to_freq[aaseq.size]
94
+ # FP Indicator
95
+ value = @tot_num_indicators * (1.0 - @frequency_indicator_opposite) * @frequency_of_indicators_sum / (@tot_num**2)
96
+ precision =
97
+ if @indicators_signify_true_hit
98
+ value # a true indicator type (gives precision)
99
+ else # false indicator type
100
+ 1 - value # 1 - fdr == precision
101
+ end
102
+ end
103
+
104
+ def calculate_background_frequency
105
+ @aaseq_to_fraction
106
+ end
107
+
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,9 @@
1
+
2
+ require 'ms/error_rate/sbv'
3
+ require 'ms/error_rate/decoy'
4
+
5
+ module Ms
6
+ module ErrorRate
7
+ end
8
+ end
9
+