ms-error_rate 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.autotest ADDED
@@ -0,0 +1,14 @@
1
+ # -*- ruby -*-
2
+
3
+ Autotest.add_hook :initialize do |at|
4
+ at.clear_mappings
5
+ end
6
+
7
+ Autotest.add_hook :initialize do |at|
8
+ at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
9
+ #["spec/#{m[1]}_spec.rb"]
10
+ #["test/#{m[1]}_test.rb"]
11
+ ## for both specs and tests:
12
+ ["spec/#{m[1]}_spec.rb","test/#{m[1]}_test.rb"]
13
+ }
14
+ end
data/.gitmodules ADDED
@@ -0,0 +1,9 @@
1
+ [submodule "submodule/ms-testdata"]
2
+ path = submodule/ms-testdata
3
+ url = git://github.com/bahuvrihi/ms-testdata.git
4
+ [submodule "submodule/ms-in_silico"]
5
+ path = submodule/ms-in_silico
6
+ url = git://github.com/bahuvrihi/ms-in_silico.git
7
+ [submodule "submodule/tap-mechanize"]
8
+ path = submodule/tap-mechanize
9
+ url = git://github.com/bahuvrihi/tap-mechanize.git
data/History ADDED
@@ -0,0 +1,16 @@
1
+ == 0.0.6
2
+
3
+ * changed peptide centric db output to full YAML (i.e., the protein IDs are in an inline array)
4
+
5
+ == 0.0.3
6
+
7
+ * switching to ms-template-ish structure
8
+
9
+ == 0.0.2 / 2009-10-14
10
+
11
+ * basic validation with peptide and protein centric sample bias validation.
12
+ * peptide centric database created that include methionine cleavage.
13
+
14
+ == 0.0.1 / 2009-08-25
15
+
16
+ * initial work - borrowing basic structure from ms-sequest and using original mspire lib/validators work.
data/LICENSE CHANGED
@@ -1,6 +1,8 @@
1
1
  Copyright shared among contributing institutions:
2
2
  Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
3
  Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
4
+ Copyright (c) 2011 Brigham Young University (additions)
5
+ Authored by John T. Prince
4
6
 
5
7
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
8
  of this software and associated documentation files (the "Software"), to deal
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+
2
+ require 'rubygems'
3
+ require 'rake'
4
+
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
8
+ gem.name = "ms-error_rate"
9
+ gem.homepage = "http://github.com/jtprince/ms-error_rate"
10
+ gem.license = "MIT"
11
+ gem.summary = %Q{An mspire library for calculating error rates in MS/MS identifications (FDRs).}
12
+ gem.description = %Q{aids for creating and calculating error rates using target-decoy searches and sample validation.}
13
+ gem.email = "jtprince@gmail.com"
14
+ gem.authors = ["John Prince"]
15
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
16
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
17
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
18
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
19
+ gem.rubyforge_project = 'mspire'
20
+ gem.add_runtime_dependency("ms-core", ">= 0.0.2")
21
+ gem.add_runtime_dependency("ms-ident", ">= 0.0.20")
22
+ gem.add_development_dependency "spec-more", ">= 0"
23
+ gem.add_development_dependency "jeweler", "~> 1.5.2"
24
+ gem.add_development_dependency "rcov", ">= 0"
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:spec) do |spec|
30
+ spec.libs << 'lib' << 'spec'
31
+ spec.pattern = 'spec/**/*_spec.rb'
32
+ spec.verbose = true
33
+ end
34
+
35
+ #require 'rcov/rcovtask'
36
+ #Rcov::RcovTask.new do |spec|
37
+ # spec.libs << 'spec'
38
+ # spec.pattern = 'spec/**/*_spec.rb'
39
+ # spec.verbose = true
40
+ #end
41
+
42
+ task :default => :spec
43
+
44
+ require 'rake/rdoctask'
45
+ Rake::RDocTask.new do |rdoc|
46
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
47
+
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "ms-error_rate #{version}"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
@@ -0,0 +1,27 @@
1
+
2
+ module Ms
3
+ module ErrorRate
4
+ module Decoy
5
+ # this is the # true positives (found by estimating the number of false
6
+ # hits using the # decoy)
7
+ # frit == fraction
8
+ def self.precision(num_target, num_decoy, frit=1.0)
9
+ # will calculate as floats in case fractional amounts passed in for
10
+ # whatever reason
11
+ num_target_f = num_target.to_f
12
+ num_true_pos = num_target_f - (num_decoy.to_f * frit)
13
+ precision =
14
+ if num_target_f == 0.0
15
+ if num_decoy.to_f > 0.0
16
+ 0.0
17
+ else
18
+ 1.0
19
+ end
20
+ else
21
+ num_true_pos/num_target_f
22
+ end
23
+ precision
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,93 @@
1
+
2
+ require 'ms/mascot/dat'
3
+ require 'ms/error_rate/qvalue'
4
+ require 'ms/error_rate/qvalue/mascot'
5
+
6
+ module Ms
7
+ module ErrorRate
8
+ module Qvalue
9
+ module Mascot
10
+ module Percolator
11
+
12
+ module_function
13
+ # returns an array of Structs where the keys are the first line
14
+ # everything is cast properly
15
+ # three additional keys are available query_num, rank, sequence
16
+ # sequence is the amino acid sequence without the surrounding X's
17
+ # and dots.
18
+ # (with '-' substituted for '_')
19
+ def tab_txt(file)
20
+ hits = []
21
+ File.open(file) do |io|
22
+ # PSMId score q-value posterior_error_prob peptide proteinIds
23
+ atts = io.gets.chomp.split("\t").map {|v| v.gsub('-', '_').to_sym }
24
+ atts.push(:query_num, :rank, :sequence)
25
+ struct_class = Struct.new("Hit", *atts)
26
+
27
+ io.each do |line|
28
+ (query_rank, score, qvalue, perrp, peptide, *prots ) = line.chomp.split("\t")
29
+ (query, rank) = query_rank.split(';').map {|v| v.split(':').last.to_i }
30
+
31
+ hits << struct_class.new(query_rank, score.to_f, qvalue.to_f, perrp.to_f, peptide, prots, query, rank, peptide.split('.')[1])
32
+ end
33
+ end
34
+ hits
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ module Ms::ErrorRate::Qvalue::Mascot::Percolator
44
+
45
+ module_function
46
+ # returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue)
47
+ # opts =
48
+ # :min_peptide_length => Integer
49
+ def qvalues(datp_files, tab_txt_files, opts={})
50
+ min_pep_len = opts[:min_peptide_length]
51
+
52
+ # we only want the top hit per query title (which should ensure that we
53
+ # get the top hit per scan)
54
+ hits_by_query_title = Hash.new {|h,k| h[k] = [] }
55
+ datp_files.zip(tab_txt_files) do |datp_file, tab_txt_file|
56
+ # build a hash based on the sequence
57
+ structs = Ms::ErrorRate::Qvalue::Mascot::Percolator.tab_txt( tab_txt_file )
58
+ qvalue_by_query_rank = {}
59
+ structs.each do |struct|
60
+ qvalue_by_query_rank[[struct.query_num, struct.rank]] = struct.q_value
61
+ end
62
+
63
+ base_no_ext = File.basename(datp_file, '.*')
64
+ Ms::Mascot::Dat.open(datp_file) do |dat|
65
+ dat.each_peptide_hit(:by => :groups, :yield_nil => false, :with_query => true) do |hits,query|
66
+ hits.each do |hit|
67
+ if qval = qvalue_by_query_rank[[hit.query_num, hit.hit_num]]
68
+ hit_as_struct = Ms::ErrorRate::Qvalue::Mascot::MascotPeptideHit.new(base_no_ext, query.title, query.charge, hit.sequence, hit.score, qval)
69
+ hits_by_query_title[hit_as_struct.query_title] << hit_as_struct
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ final_hits = []
77
+ hits_by_query_title.each do |title, hits|
78
+ best_hit =
79
+ if hits.size == 1
80
+ hits.first
81
+ else
82
+ hits.sort_by(&:mowse).last
83
+ end
84
+ # FILTER HERE:
85
+ # ONLY TAKE the BEST HIT IF it passes any filters
86
+ if min_pep_len
87
+ next unless best_hit.sequence.size >= min_pep_len
88
+ end
89
+ final_hits << best_hit
90
+ end
91
+ final_hits
92
+ end
93
+ end
@@ -0,0 +1,68 @@
1
+ require 'ms/error_rate/qvalue'
2
+ require 'ms/mascot/dat'
3
+
4
+ module Ms
5
+ module ErrorRate
6
+ module Qvalue
7
+ module Mascot
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+
14
+ module Ms::ErrorRate::Qvalue::Mascot
15
+ MEMBERS = [:filename, :query_title, :charge, :sequence, :mowse, :qvalue]
16
+ MascotPeptideHit = Struct.new(*MEMBERS) do
17
+ # emits an array rather than a Struct object
18
+ def to_yaml(*args)
19
+ to_a.to_yaml(*args)
20
+ end
21
+ end
22
+
23
+ module_function
24
+ # returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue)
25
+ # opts =
26
+ # :min_peptide_length => Integer
27
+ def qvalues(target_files, decoy_files, opts={})
28
+ min_pep_len = opts[:min_peptide_length]
29
+
30
+ # we only want the top hit per query title (which should ensure that we
31
+ # get the top hit per scan)
32
+ (target_hits, decoy_hits) = [target_files, decoy_files].map do |files|
33
+ hits_by_query_title = Hash.new {|h,k| h[k] = [] }
34
+ files.each do |file|
35
+ base_no_ext = File.basename(file, '.*')
36
+ Ms::Mascot::Dat.open(file) do |dat|
37
+ dat.each_peptide_hit(:by => :top, :yield_nil => false, :with_query => true) do |hit,query|
38
+
39
+ hit_as_struct = MascotPeptideHit.new(base_no_ext, query.title, query.charge, hit.sequence, hit.score)
40
+ hits_by_query_title[hit_as_struct.query_title] << hit_as_struct
41
+ end
42
+ end
43
+ end
44
+
45
+ final_hits = []
46
+ hits_by_query_title.each do |title, hits|
47
+ best_hit =
48
+ if hits.size == 1
49
+ hits.first
50
+ else
51
+ hits.sort_by(&:mowse).last
52
+ end
53
+ # FILTER HERE:
54
+ # ONLY TAKE the BEST HIT IF it passes any filters
55
+ if min_pep_len
56
+ next unless best_hit.sequence.size >= min_pep_len
57
+ end
58
+ final_hits << best_hit
59
+ end
60
+ final_hits
61
+ end
62
+ pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_hits, decoy_hits, opts, &:mowse)
63
+ pairs.map do |hit, qval|
64
+ hit.qvalue = qval
65
+ hit
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,52 @@
1
+ require 'ms/error_rate/qvalue'
2
+
3
+ module Ms ; end
4
+ module Ms::ErrorRate ; end
5
+ module Ms::ErrorRate::Qvalue ; end
6
+
7
+ module Ms::ErrorRate::Qvalue::Pepxml
8
+ module_function
9
+
10
+ # returns an array of hit and qvalue pairs
11
+ # retrieves the aaseq, charge, and all search_score keys and values for use
12
+ # in the search_hit. caller must provide a sort_by block, where the best
13
+ # hits are last. charge is an integer, and all other search scores are cast
14
+ # as floats. returns the output filename.
15
+ def target_decoy_qvalues(target_pepxml, decoy_pepxml, opt={}, &sort_by)
16
+
17
+ # this is a list of high quality peptide hits associated with each group
18
+ fields = [:aaseq, :charge]
19
+ ss_names = []
20
+ have_ss_names = false
21
+ (target_hits, decoy_hits) = [target_pepxml, decoy_pepxml].map do |file|
22
+ # begin with aaseq, charge
23
+ File.open(file) do |io|
24
+ doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
25
+ # we can work with namespaces, or just remove them ...
26
+ doc.remove_namespaces!
27
+ root = doc.root
28
+ search_hits = root.xpath('//search_hit')
29
+ search_hits.map do |search_hit|
30
+ aaseq = search_hit['peptide']
31
+ charge = search_hit.parent.parent['assumed_charge'].to_i
32
+ search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
33
+ ss_values = []
34
+ search_score_nodes.each do |node|
35
+ ss_names << node['name'].to_sym unless have_ss_names
36
+ ss_values << node['value'].to_f
37
+ end
38
+ have_ss_names = true
39
+ [aaseq, charge, *ss_values]
40
+ end
41
+ end
42
+ end
43
+
44
+ fields.push(*ss_names)
45
+
46
+ peptide_hit_class = Struct.new(*fields)
47
+ (t_hits, d_hits) = [target_hits, decoy_hits].map {|hits| hits.map {|hit_values| peptide_hit_class.new(*hit_values) } }
48
+
49
+ # hit and qvalue pairs
50
+ Ms::ErrorRate::Qvalue.target_decoy_qvalues(t_hits, d_hits, :z_together => opt[:z_together], &sort_by)
51
+ end
52
+ end
@@ -0,0 +1,93 @@
1
+
2
+ require 'set'
3
+ require 'ms/error_rate/decoy'
4
+
5
+
6
+ class Array
7
+ def group_by(&block)
8
+ hash = Hash.new {|h,k| h[k] = [] }
9
+ each do |v|
10
+ hash[block.call(v)] << v
11
+ end
12
+ hash
13
+ end unless [].respond_to?(:group_by)
14
+ end
15
+
16
+ module Ms
17
+
18
+ module ErrorRate
19
+ # For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
20
+ module Qvalue
21
+ module_function
22
+
23
+ # returns a parallel array to target hits with qvalues
24
+ # opts = :z_together true/false (default false) group all charges
25
+ # together.
26
+ # the sort block should sort from worst to best
27
+ # by default, sorting is: {|hit| hit.score} if not provided
28
+ # options also passed through to mixed_target_decoy
29
+ def target_decoy_qvalues(target_hits, decoy_hits, opts={}, &sorting)
30
+ sorting ||= :score
31
+ opts = {:z_together => false}.merge(opts)
32
+ target_set = Set.new(target_hits)
33
+
34
+ # Proc.new doesn't do arity checking
35
+ hit_with_qvalue_pairs = Proc.new do |hits|
36
+ sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
37
+ (target_hits, qvalues) = Ms::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
38
+ target_hits.zip(qvalues)
39
+ end
40
+
41
+ all_together = target_hits + decoy_hits
42
+ if !opts[:z_together]
43
+ hit_with_qvalue_pairs.call(all_together)
44
+ else
45
+ all_hits = []
46
+ by_charge = all_together.group_by(&:charge)
47
+ by_charge.each do |charge,hits|
48
+ all_hits.push(*(hit_with_qvalue_pairs.call(hits)))
49
+ end
50
+ all_hits
51
+ end
52
+ end
53
+
54
+ # returns [target_hits, qvalues] (parallel arrays sorted from best hit to
55
+ # worst hit). expects an array-like object of hits sorted from best to worst
56
+ # hit with decoys interspersed and a target_setlike object that responds to
57
+ # :include? for the hit object assumes the hit is a decoy if not found
58
+ # in the target set! if monotonic is false, then the guarantee that
59
+ # qvalues be monotonically increasing is not respected.
60
+ def mixed_target_decoy(best_to_worst, target_setlike, opts={})
61
+ opts = {:monotonic => true}.merge(opts)
62
+ num_target = 0 ; num_decoy = 0
63
+ monotonic = opts[:monotonic]
64
+ target_hits = []
65
+ qvalues = []
66
+ best_to_worst.each do |hit|
67
+ if target_setlike.include?(hit)
68
+ num_target += 1
69
+ precision = Ms::ErrorRate::Decoy.precision(num_target, num_decoy)
70
+ target_hits << hit
71
+ qvalues << (1.0 - precision)
72
+ else
73
+ num_decoy += 1
74
+ end
75
+ end
76
+ if opts[:monotonic]
77
+ min_qvalue = qvalues.last
78
+ qvalues = qvalues.reverse.map do |val| # from worst to best score
79
+ if min_qvalue < val
80
+ min_qvalue
81
+ else
82
+ min_qvalue = val
83
+ val
84
+ end
85
+ end.reverse
86
+ end
87
+ [target_hits, qvalues]
88
+ end
89
+
90
+
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,30 @@
1
+ require 'ms/error_rate/sbv'
2
+
3
+ module Ms
4
+ module ErrorRate
5
+ class Sbv
6
+ # Constraints on aaseq attribute of peptides (the bare amino acid sequence)
7
+ # works by calculating amino acid frequencies in the fasta file used.
8
+ class PeptideBased
9
+
10
+ def self.generate_hashes(pep_to_prot_file, aa="C", min_num=1 )
11
+ Ms::ErrorRate::Sbv.generate_hashes(pep_to_prot_file, :type_code => "aa_min#{min_num}") do |pep|
12
+ if min_num == 1
13
+ if pep.include?(aa) ; 1
14
+ else ; 0
15
+ end
16
+ else
17
+ count = 0
18
+ pep.each_char {|c| count += 1 if c == aa }
19
+ if count >= min_num ; 1
20
+ else ; 0
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ end # class
27
+ end # Sbv
28
+ end # ER
29
+ end # Ms
30
+
@@ -0,0 +1,39 @@
1
+ require 'ms/fasta'
2
+ require 'ms/error_rate/sbv'
3
+ require 'transmembrane'
4
+
5
+ module Ms
6
+ module ErrorRate
7
+ class Sbv
8
+ module ProteinBased
9
+ DEFAULT_NO_PROTS_VAL = 0.0
10
+ # note the pep to prot hash has proteins in a string separated by a
11
+ # hyphen. returns the names of the files written
12
+ def self.generate_hashes(pep_to_prot_file, protid_to_val, options={})
13
+ options[:protein_hash] = protid_to_val
14
+ options[:type_code] = 'tm' unless options[:type_code]
15
+ files = Ms::ErrorRate::Sbv.generate_hashes(pep_to_prot_file, options) do |prot_return_vals|
16
+
17
+ total_with_bias = 0
18
+ total_known = 0
19
+ prot_return_vals.each do |val|
20
+ if !val.nil?
21
+ total_with_bias += val
22
+ total_known += 1
23
+ end
24
+ end
25
+ if total_known == 0
26
+ DEFAULT_NO_PROTS_VAL
27
+ else
28
+ total_with_bias.to_f / total_known
29
+ end
30
+ end #block
31
+
32
+ files
33
+
34
+ end # end method
35
+ end # module
36
+ end # class
37
+ end # ErrorRate
38
+ end # Ms
39
+
@@ -0,0 +1,111 @@
1
+
2
+
3
+ module Ms
4
+ module ErrorRate
5
+ # Sample Bias Validator
6
+ class Sbv
7
+ LENGTH_EXT = 'freq_by_length'
8
+ AASEQ_EXT = 'by_aaseq'
9
+
10
+ # if a protein hash is given, will yield the return an array of
11
+ # values generated with the value from keying each protein of the
12
+ # peptide. Otherwise, will yield each peptide in turn
13
+ def self.generate_hashes(pep_to_prot_file, opts={})
14
+ op = { :aaseq_ext => AASEQ_EXT,
15
+ :length_ext => LENGTH_EXT,
16
+ :file_ext => '.yml',
17
+ :type_code => '',
18
+ :protein_hash => nil,
19
+ :stderr_counter => true,
20
+ }.merge(opts)
21
+
22
+ base = pep_to_prot_file.chomp(File.extname(pep_to_prot_file))
23
+ freqs = Hash.new {|h,k| h[k] = 0.0 }
24
+ counts = Hash.new {|h,k| h[k] = 0 }
25
+ (fileout1, fileout2) = [:aaseq_ext, :length_ext].map do |type_ext|
26
+ base + '.' + op[:type_code] + '.' + op[type_ext] + op[:file_ext]
27
+ end
28
+ protein_hash = op[:protein_hash]
29
+ pep_count = 0
30
+ if op[:stderr_counter]
31
+ $stderr.print "[working, 100,000 peptides = '.'] "
32
+ $stderr.flush
33
+ end
34
+ File.open(fileout1 , 'w') do |out|
35
+ IO.foreach(pep_to_prot_file) do |line|
36
+ (pep, prot_string) = line.chomp!.split(': ')
37
+
38
+ total_transmembrane = 0
39
+ total_known = 0
40
+ answ =
41
+ if protein_hash
42
+ yield( protein_hash.values_at(*(prot_string.split('-'))) )
43
+ else
44
+ yield(pep)
45
+ end
46
+ out.puts "#{pep}: #{answ}"
47
+ freqs[pep.size] += answ
48
+ counts[pep.size] += 1
49
+ pep_count += 1
50
+ if pep_count % 100000 == 0 && op[:stderr_counter]
51
+ $stderr.print '.'
52
+ $stderr.flush
53
+ end
54
+ end
55
+ end
56
+ $stderr.puts "DONE!" if op[:stderr_counter]
57
+ avg_freq_ar = {}
58
+ freqs.each do |k,v|
59
+ avg_freq_ar[k] = v / counts[k]
60
+ end
61
+ File.open(fileout2, 'w') {|out| out.print avg_freq_ar.to_yaml }
62
+ [fileout1, fileout2]
63
+ end
64
+
65
+
66
+ # a hash by aaseq giving a value between 0 and 1 telling how much of
67
+ # an indicator the hit is
68
+ attr_accessor :indicator_by_aaseq
69
+
70
+ attr_accessor :frequency_indicator_opposite
71
+
72
+ attr_accessor :size_to_freq
73
+
74
+ # boolean
75
+ attr_accessor :indicators_signify_true_hit
76
+
77
+
78
+ def initialize(indicator_by_aaseq_hash, size_to_freq, frequency_indicator_opposite, indicators_signify_true_hit=true)
79
+ @indicators_signify_true_hit = indicators_signify_true_hit
80
+ @frequency_indicator_opposite = frequency_indicator_opposite
81
+ @indicator_by_aaseq = indicator_by_aaseq_hash
82
+ @tot_num_indicators = 0.0
83
+ @tot_num = 0
84
+ end
85
+
86
+ # returns the cumulative precision (fraction of true positives among
87
+ # total hits) frequency_of_indicators is the probability that a generic
88
+ # amino acid sequence will be an indicator (this may variable by
89
+ # sequence length).
90
+ def update_precision(aaseq)
91
+ @tot_num_indicators << indicator_by_aaseq[aaseq]
92
+ @tot_num += 1
93
+ @frequency_of_indicators_sum += @size_to_freq[aaseq.size]
94
+ # FP Indicator
95
+ value = @tot_num_indicators * (1.0 - @frequency_indicator_opposite) * @frequency_of_indicators_sum / (@tot_num**2)
96
+ precision =
97
+ if @indicators_signify_true_hit
98
+ value # a true indicator type (gives precision)
99
+ else # false indicator type
100
+ 1 - value # 1 - fdr == precision
101
+ end
102
+ end
103
+
104
+ def calculate_background_frequency
105
+ @aaseq_to_fraction
106
+ end
107
+
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,9 @@
1
+
2
+ require 'ms/error_rate/sbv'
3
+ require 'ms/error_rate/decoy'
4
+
5
+ module Ms
6
+ module ErrorRate
7
+ end
8
+ end
9
+