ms-error_rate 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright shared among contributing institutions:
2
+ Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
+ Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,53 @@
1
+ = {ms-error_rate}[http://mspire.rubyforge.org/projects/ms-error_rate]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library for calculating or dealing
4
+ with error rates. These may be from target-decoy searches, sample bias
5
+ validation, or other sources.
6
+
7
+ == Examples
8
+
9
+ === Target-Decoy with Mascot
10
+
11
+ Generate q-values (right now only with Mascot and MascotPercolator):
12
+
13
+ require 'ms/error_rate/qvalue'
14
+ target_hits = Ms::ErrorRate::Qvalue::Mascot.qvalues(target_files, decoy_files)
15
+ # target_hit is a PeptideHit Struct (:filename, :query_title, :charge, :sequence, :mowse, :qvalue)
16
+
17
+ # or on the commandline:
18
+ % qvalues.rb <target>.dat <decoy>.dat
19
+
20
+ The same output can be produced from Mascot-Percolator output:
21
+
22
+ require 'ms/error_rate/qvalue'
23
+ target_hits = Ms::ErrorRate::Qvalue::Mascot::Percolator.qvalues(datp_files, tab_dot_text_files)
24
+ # or commandline:
25
+ % qvalues.rb <target>.datp <target>.tab.txt
26
+
27
+ === Sample Bias Validation
28
+
29
+ Sample Bias Validation allows error rate determination based on expected biases in sample composition. Here is an example using transmembrane sequence content. We will assume a fasta file called `proteins.fasta`:
30
+
31
+ # create a peptide-centric database
32
+ fasta_to_peptide_centric_db.rb proteins.fasta # defaults 2 missed cleavages, min aaseq 4
33
+ # generates a file: proteins.msd_clvg2.min_aaseq4.yml
34
+
35
+ # create a transmembrane sequence prediction file
36
+ fasta_to_phobius.rb proteins.fasta # => generates proteins.phobius
37
+
38
+ generate_sbv_input_hashes.rb proteins.msd_clvg2.min_aaseq4.yml --tm proteins.phobius,1
39
+ # creates two files:
40
+ # proteins.msd_clvg2.min_aaseq4.tm_min1.by_aaseq.yml
41
+ # proteins.msd_clvg2.min_aaseq4.tm_min1.freq_by_length.yml
42
+
43
+ # cytosolic fraction (transmembrane sequences not expected):
44
+ error_rate qvalues.yml --fp-sbv proteins.msd_clvg2.min_aaseq4.tm_min1.by_aaseq.yml,\
45
+ proteins.msd_clvg2.min_aaseq4.tm_min1.freq_by_length.yml,0.05
46
+
47
+ == Installation
48
+
49
+ gem install ms-error_rate
50
+
51
+ == Copyright
52
+
53
+ See LICENSE
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.7
data/bin/error_rate ADDED
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'support/sort_by_attributes'
4
+ require 'ms/error_rate'
5
+ require 'optparse'
6
+
7
+ @num_tp_validators = 0
8
+ @num_fp_validators = 0
9
+
10
+
11
+ arg_strings_ar = [true,false].map do |boolean|
12
+ ["a false positive indicator", "A = aaseq to indictor (0-1) yml file", "B = aaseq_length to frequency yml file", "C = rate indicators are #{boolean} pos.", "D = name of the validator"]
13
+ end
14
+
15
+ op_sbv_ars = [true,false].zip(arg_strings_ar).map do |boolean, arg_strings|
16
+ letter = boolean ? 't' : 'f'
17
+ ["--#{letter}p-sbv <A,B,C[,D]>,", Array, *arg_strings]
18
+ end
19
+
20
+ raw_validator_args = []
21
+ validator_names = []
22
+
23
+ opt = {
24
+ :order_by => [:qvalue],
25
+ }
26
+
27
+ opts = OptionParser.new do |op|
28
+ op.banner = "usage: #{File.basename(__FILE__)} qvalues.yml ..."
29
+
30
+ op.on("--order-by <Array>", Array, "the keys to order on (default: [qvalue])") {|v| opt[:order_by] = v.map {|v| v.to_sym } }
31
+ op.on("--best-is-low <Array>", Array, "the keys where better score is lower") {|v| opt[:best_is_low] = v.map {|v| v.to_sym } }
32
+
33
+ [true, false].each do |boolean|
34
+ index = boolean ? 0 : 1
35
+ op.on(*op_sbv_ars[index]) do |v|
36
+ (a,b,c,d) = v
37
+ name =
38
+ if d
39
+ d
40
+ else
41
+ if boolean
42
+ @num_tp_validator += 1
43
+ "tp#{@num_tp_validator}"
44
+ else
45
+ @num_fp_validator += 1
46
+ "fp#{@num_fp_validators}"
47
+ end
48
+ end
49
+ validator_names << name
50
+ raw_validator_args << [a,b,c]
51
+ end
52
+ end
53
+ end
54
+
55
+ opts.parse!
56
+
57
+
58
+ if ARGV.size != 1
59
+ puts opts.to_s
60
+ exit
61
+ elsif !opt[:order_by]
62
+ puts "you must specify the order-by array!"
63
+ exit
64
+ end
65
+
66
+ sort_args = opt[:order_by]
67
+ sort_args << {:down => opt[:best_is_low] } # because we will sort normal and reverse the array
68
+
69
+
70
+ # load one validator at a time
71
+
72
+ raw_validator_args.zip(validator_names) do |args, name|
73
+ (a,b,c) = args
74
+ val = Ms::ErrorRate::Sbv.new(YAML.load_file(a), YAML.load_file(b), c)
75
+
76
+ ARGV.each do |file|
77
+ yaml = YAML.load_file(file)
78
+ pepclass = Struct.new(yaml['headers'].map {|v| v.to_sym })
79
+ peps = yaml['data'].each do |ar|
80
+ pepclass.new(*ar)
81
+ end
82
+ sorted_best_to_worst = peps.sort_by_attributes(sort_args)
83
+
84
+ precision_vals = sorted_best_to_worst.map do |pep|
85
+ val.update_precision(pep.aaseq)
86
+ end
87
+
88
+ end
89
+ p precision_vals
90
+ end
91
+
92
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/ruby
2
+
3
+
4
+ if ARGV.size == 0
5
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta"
6
+ puts "output: <file>"
7
+ #puts "WARNING!!: you need to run phobius_to_nontransmembrane.rb before"
8
+ #puts "this to weed out transmembrane proteins!"
9
+ exit
10
+ end
11
+
12
+
13
+
14
+
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'optparse'
5
+ require 'ms/ident/peptidedb'
6
+
7
+ Ms::Ident::Peptidedb.cmdline(ARGV)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'mechanize'
4
+
5
+ page = 'http://phobius.sbc.su.se/'
6
+
7
+ if ARGV.size == 0
8
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta"
9
+ puts "outputs <file>.phobius "
10
+ puts "in short format"
11
+ exit
12
+ end
13
+
14
+
15
+ a = WWW::Mechanize.new { |agent|
16
+ agent.user_agent_alias = 'Mac Safari'
17
+ }
18
+
19
+ ARGV.each do |file|
20
+ outfile = file.chomp(File.extname(file)) + '.phobius'
21
+ a.get(page) do |page|
22
+ form = page.forms.first
23
+ form.radiobuttons.select {|v| v.value == 'short' }.first.click
24
+ fu = form.file_uploads.first
25
+ fu.file_name = File.expand_path(file)
26
+ #fu.file_data = IO.read(file)
27
+ reply = form.submit
28
+ html = reply.body
29
+ start = html.index("<pre>") + 5
30
+ stop = html.rindex("</pre>")
31
+ File.open(outfile, 'w') {|out| out.print html[start...stop] }
32
+ end
33
+ end
34
+
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+
5
+ require 'ms/error_rate/sbv/peptide_based'
6
+ require 'ms/error_rate/sbv/protein_based'
7
+
8
+ opt = {}
9
+ opt[:protein_bias] = []
10
+
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} peptide_centric_db [OPTION]"
13
+ op.on("--tm <phobius,min>", Array, "transmembrane, <phobius> is path to phobius ", "output file (see fasta_to_phobius.rb)", "<min> is the min number of tm sequences required") {|v| opt[:tm] = [v.first, v.last.to_i]}
14
+ op.on("--aa <aa,min>", Array, "amino acid, <aa> is a string found in the peptides", "<min> is the min number of required for counting") {|v| opt[:aa] = [v.first, v.last.to_i]}
15
+ op.on("--protein-bias <name,file>", Array, "<name> bias, <file> is path to a yaml hash", " keyed prot -> <0-1>") {|v| opt[:protein_bias] << [v.first.to_sym, v.last]}
16
+ op.separator "outputs for each bias type:"
17
+ op.separator " <peptide_centric_db>.<info>.#{Ms::ErrorRate::Sbv::LENGTH_EXT}"
18
+ op.separator " <peptide_centric_db>.<info>.#{Ms::ErrorRate::Sbv::AASEQ_EXT}"
19
+ end
20
+
21
+ opts.parse!
22
+
23
+ if ARGV.size == 0
24
+ puts opts.to_s
25
+ exit
26
+ end
27
+
28
+ peptide_centric_db = ARGV.first
29
+
30
+ def note_files(files)
31
+ files.each do |file| puts "WROTE: #{file}" end
32
+ end
33
+
34
+ klass = Ms::ErrorRate::Sbv
35
+ prot_klass = Ms::ErrorRate::Sbv::ProteinBased
36
+ pep_klass = Ms::ErrorRate::Sbv::PeptideBased
37
+
38
+ if opt[:tm]
39
+ index = TransmembraneIndex.new(opt[:tm].first)
40
+
41
+ protid_to_transmembrane = {}
42
+ regexp = nil
43
+ index.each do |k,v|
44
+ regexp ||= Ms::Fasta.id_regexp(k)
45
+ new_key = regexp.match(k)[1]
46
+ protid_to_transmembrane[new_key] = ((v[:num_certain_transmembrane_segments] >= opt[:tm].last) ? 1 : 0)
47
+ end
48
+
49
+ fnames = prot_klass.generate_hashes( peptide_centric_db, protid_to_transmembrane, {:type_code => "tm_min#{opt[:tm].last}"})
50
+ note_files fnames
51
+ end
52
+
53
+ if opt[:aa]
54
+ fnames = pep_klass.generate_hashes( peptide_centric_db, *opt[:aa] )
55
+ note_files fnames
56
+ end
57
+
58
+ if opt[:protein_bias].size > 0
59
+ opt[:protein_bias].each do |name, hash_file|
60
+ prot_klass.generate_hashes( peptide_centric_db, hash_file)
61
+ end
62
+ end
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'trollop'
4
+ require 'nokogiri'
5
+ require 'set'
6
+
7
+ require 'ms/error_rate/qvalue'
8
+
9
+ opts = Trollop::Parser.new do
10
+ banner %Q{usage: #{File.basename(__FILE__)} <fwd>.xml <decoy>.xml ...
11
+ outputs: <fwd>.phq.csv
12
+ phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
13
+ }
14
+ opt :z_together, "do not group by charge state", :default => false
15
+ end
16
+
17
+ DELIMITER = "\t"
18
+
19
+ opt = opts.parse(ARGV)
20
+ if ARGV.size == 0 || (ARGV.size%2 != 0)
21
+ puts "\n\n!! only even numbers of files accepted (target decoy target decoy ...) !!\n\n" if (ARGV.size%2 != 0)
22
+ opts.educate
23
+ exit
24
+ end
25
+
26
+ files = ARGV.to_a
27
+
28
+ PeptideHit = Struct.new(:aaseq, :charge, :ionscore, :qvalue)
29
+
30
+ # this is a list of high quality peptide hits associated with each group
31
+ peptide_hits_per_file = files.map do |file|
32
+ File.open(file) do |io|
33
+ doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
34
+ # we can work with namespaces, or just remove them ...
35
+ doc.remove_namespaces!
36
+ root = doc.root
37
+ search_hits = root.xpath('//search_hit')
38
+ search_hits.map do |search_hit|
39
+ aaseq = search_hit['peptide']
40
+ ionscore = search_hit.children.find {|node| node.name == 'search_score' && node['name'] == 'ionscore' }['value'].to_f
41
+ charge = search_hit.parent.parent['assumed_charge'].to_i
42
+ PeptideHit.new(aaseq, charge, ionscore)
43
+ end
44
+ end
45
+ end
46
+
47
+ hits_per_target = peptide_hits_per_file.each_slice(2).map do |target_hits, decoy_hits|
48
+ pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_hits, decoy_hits, :z_together => opt[:z_together], &:ionscore)
49
+ target_peptide_hits = pairs.map {|peptide_hit, qvalue| peptide_hit.qvalue = qvalue ; peptide_hit }
50
+ end
51
+
52
+ files.each_slice(2).map(&:first).zip(hits_per_target) do |file, hits|
53
+ newfile = file.chomp(File.extname(file)) + ".phq.tsv"
54
+ File.open(newfile,'w') do |out|
55
+ out.puts %w(aaseq charge qvalue).join(DELIMITER)
56
+ hits.each do |hit|
57
+ out.puts hit.values_at(0,1,3).join(DELIMITER)
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'ms/fasta'
4
+ require 'transmembrane/phobius.rb'
5
+
6
+ if ARGV.size != 3
7
+ puts "usage: #{File.basename(__FILE__)} <max#tm> phobius_file_short <file>.fasta"
8
+ puts "max#tm = max # of transmembrane sequences allowed to be a non-transmembrane."
9
+ puts ""
10
+ puts "outputs: <file>_NONTM.fasta"
11
+ exit
12
+ end
13
+
14
+ (max_num_tm, phobius_short_file, fasta_db_file) = ARGV
15
+ max_num_tm = max_num_tm.to_i
16
+
17
+ base = fasta_db_file.chomp(File.extname(fasta_db_file))
18
+ outfile = base + "_NONTM.fasta"
19
+
20
+ index = Phobius::Index.new(phobius_short_file)
21
+
22
+ File.open(outfile, 'w') do |out|
23
+ Ms::Fasta.open(fasta_db_file) do |fasta|
24
+ fasta.each do |entry|
25
+ key = index.reference_to_key(entry.header)
26
+ abort "can't find key: #{key} for #{entry.header}" unless index.key?(key)
27
+ num_tms = index[key][:num_certain_transmembrane_segments]
28
+ if num_tms <= max_num_tm
29
+ out.print entry.to_s
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+
data/bin/qvalues.rb ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'ms/error_rate/qvalue'
5
+
6
+ DEF_EXT = "_flip"
7
+ NORMAL_EXT = 'qval.yml'
8
+
9
+ def print_out(outfile, filenames, headers, target_hits)
10
+ File.open(outfile, 'w') do |out|
11
+ out.print( {'headers' => headers, 'filenames' => filenames, 'data' => target_hits }.to_yaml )
12
+ end
13
+ end
14
+
15
+ opt = {
16
+ :outfile => NORMAL_EXT,
17
+ :min_peptide_length => 9,
18
+ }
19
+
20
+ opts = OptionParser.new do |op|
21
+ op.banner = "usage: #{File.basename(__FILE__)} <target> <decoy> [... (as pairs)]"
22
+ op.separator "or: #{File.basename(__FILE__)} <target>.datp <target>.tab.txt [... (as pairs)]"
23
+ op.separator "for each pair of files"
24
+ op.separator "sorts the peptide hits by score and determines the precision at each hit"
25
+ op.separator ""
26
+ op.separator "writes a yaml file <target>.'#{NORMAL_EXT}' which"
27
+ op.separator "has three keys: 'headers', 'filenames', and 'data'"
28
+ op.separator " headers contains an array showing what is in the data"
29
+ op.separator " filenames: (a hash with two keys holding an array of full path names)"
30
+ op.separator " target:"
31
+ op.separator " decoy:"
32
+ op.separator " data: (an array with the data values)"
33
+ op.separator "headers: <the headers of the hits>"
34
+ op.separator ""
35
+ op.separator "headers guaranteed to have at least: filename, query_title, charge, sequence, qvalue"
36
+ op.separator ""
37
+ op.on("-l", "--min-peptide-length <Int>", Integer, "min num aa's to accept (default: #{opt[:min_peptide_length]})") {|v| opt[:min_peptide_length] = v }
38
+ op.on("--z-together", "combines all charge states for precision calc") {|v| opt[:z_together] = v }
39
+ op.on("-o", "--outfile <name>", "write to specified file") {|v| opt[:outfile] = v }
40
+ op.on("-g", "--group-together", "process all forwards together and all decoys together", "will output to opt[:outfile] unless -o given") {|v| opt[:group_together] = v }
41
+ op.on("-f", "--find-decoy [ext]", "finds the decoy file, default <file>#{DEF_EXT}.<ext>", "obviating the need to specify it on the commandline") do |v|
42
+ if v.is_a? String
43
+ opt[:find_decoy] = v
44
+ else
45
+ opt[:find_decoy] = DEF_EXT
46
+ end
47
+ end
48
+ end
49
+
50
+ opts.parse!
51
+
52
+ if ARGV.size == 0
53
+ puts opts.to_s
54
+ exit
55
+ end
56
+
57
+ target_files = []
58
+ decoy_files = []
59
+ if opt[:find_decoy]
60
+ target_files = ARGV.to_a.dup
61
+ decoy_files = target_files.map do |tf|
62
+ ext = File.extname(tf)
63
+ basename = tf.chomp(ext)
64
+ decoy_file = basename + opt[:find_decoy] + ext
65
+ raise ArgumentError, "cannot find #{decoy_file}" unless File.exist?(decoy_file)
66
+ decoy_file
67
+ end
68
+ else
69
+ ARGV.each_slice(2) do |target, decoy|
70
+ target_files << target
71
+ decoy_files << decoy
72
+ end
73
+ end
74
+
75
+ require 'ms/error_rate/qvalue/mascot'
76
+ require 'ms/error_rate/qvalue/mascot/percolator'
77
+
78
+ mascot_percolator = (File.extname(target_files.first) == '.datp')
79
+ headers = Ms::ErrorRate::Qvalue::Mascot::MEMBERS.map(&:to_s)
80
+ if opt[:group_together]
81
+ outfile = opt[:outfile]
82
+ if mascot_percolator
83
+ filenames = { 'target' => target_files, 'decoy' => decoy_files }
84
+ # in the case of mascot_percolator, the "target" files are .datp files and
85
+ # "decoy" files the .tab.txt files
86
+ target_hits = Ms::ErrorRate::Qvalue::Mascot::Percolator.qvalues( target_files, decoy_files, opt).sort_by(&:qvalue)
87
+ else
88
+ filenames = { 'target' => target_files, 'decoy' => decoy_files }
89
+ target_hits = Ms::ErrorRate::Qvalue::Mascot.qvalues(target_files, decoy_files, opt).sort_by(&:qvalue)
90
+ end
91
+ print_out(outfile, filenames, headers, target_hits)
92
+ else
93
+ target_files.zip(decoy_files) do |target_file, decoy_file|
94
+ if mascot_percolator
95
+ filenames = { 'datp' => [target_file], 'tab_txt' => [decoy_file] }
96
+ target_hits = Ms::ErrorRate::Qvalue::Mascot::Percolator.qvalues([target_file], [decoy_file], opt).sort_by(&:qvalue)
97
+ else
98
+ filenames = { 'target' => [target_file], 'decoy' => [decoy_file] }
99
+ target_hits = Ms::ErrorRate::Qvalue::Mascot.qvalues([target_file], [decoy_file], opt).sort_by(&:qvalue)
100
+ end
101
+ base = target_file.chomp(File.extname(target_file))
102
+ outfile = base + '.' + NORMAL_EXT
103
+ print_out(outfile, filenames, headers, target_hits)
104
+ end
105
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/error_rate/qvalue'
4
+
5
+ Hit = Struct.new(:score, :charge)
6
+
7
+ describe 'calculating q-values' do
8
+
9
+ before do
10
+ scores = [14,15,13,12,11]
11
+ qvals_expected = [0.5 ,0.0, 2.0/3.0, 3.0/4, 4.0/5]
12
+ @target_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(*pair) }
13
+ @decoy_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(pair.first-0.5, pair.last) }
14
+ @qval_by_hit = {}
15
+ @target_hits.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
16
+ end
17
+
18
+ it 'can calculate qvalues on target deccoy sets' do
19
+ pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits, @decoy_hits)
20
+ pairs.each do |hit, qval|
21
+ @qval_by_hit[hit].should.be.close(qval, 0.00000001)
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/error_rate'
4
+ require 'ostruct'
5
+
6
+ xdescribe 'not quite sure what this is' do
7
+
8
+ it 'calculates bayesian probabilities' do
9
+ # C = is a correct ID
10
+ # T = transmembrane content
11
+ # Y = cysteine content
12
+ # A = abundance
13
+ # p(C|T,Y,A) = p(T|C)p(Y|C)p(A|C)p(C) / p(T)p(Y)p(A)
14
+ peps.map do |pep|
15
+ # what is the probability of that un-transmembraneyness being correct?
16
+ # what is the probability of that un-cysteineness being correct?
17
+ # what is the probability of that high abundanceness being correct?
18
+ pep.bayes_probs.reduce(prob_being_correct) do |prob|
19
+ end
20
+ p_correct = pep.prior_prob_correct
21
+ pep.not_transmembrane? * pep.not_cysteine? * pep.not_low_abundance?
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'spec/more'
3
+
4
+ Bacon.summary_on_exit
5
+
6
+ TESTFILES = File.dirname(__FILE__) + "/testfiles"
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-error_rate
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 7
9
+ version: 0.0.7
10
+ platform: ruby
11
+ authors:
12
+ - John T. Prince
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-03-28 00:00:00 -06:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: ms-core
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 0
31
+ - 2
32
+ version: 0.0.2
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: ms-fasta
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 0
45
+ - 2
46
+ - 3
47
+ version: 0.2.3
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: spec-more
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :development
62
+ version_requirements: *id003
63
+ description: aids for creating and calculating error rates using target-decoy searches and sample validation.
64
+ email: jtprince@gmail.com
65
+ executables:
66
+ - error_rate
67
+ - fasta_to_nuclear.rb
68
+ - fasta_to_peptide_centric_db.rb
69
+ - fasta_to_phobius.rb
70
+ - generate_sbv_input_hashes.rb
71
+ - mascot_pepxml_to_peptide_hit_qvalues.rb
72
+ - phobius_to_nontransmembrane.rb
73
+ - qvalues.rb
74
+ extensions: []
75
+
76
+ extra_rdoc_files:
77
+ - LICENSE
78
+ - README.rdoc
79
+ files:
80
+ - VERSION
81
+ - LICENSE
82
+ - README.rdoc
83
+ - spec/ms/error_rate/qvalue_spec.rb
84
+ - spec/ms/error_rate_spec.rb
85
+ - spec/spec_helper.rb
86
+ - bin/error_rate
87
+ - bin/fasta_to_nuclear.rb
88
+ - bin/fasta_to_peptide_centric_db.rb
89
+ - bin/fasta_to_phobius.rb
90
+ - bin/generate_sbv_input_hashes.rb
91
+ - bin/mascot_pepxml_to_peptide_hit_qvalues.rb
92
+ - bin/phobius_to_nontransmembrane.rb
93
+ - bin/qvalues.rb
94
+ has_rdoc: true
95
+ homepage: http://jtprince.github.com/ms-error_rate
96
+ licenses: []
97
+
98
+ post_install_message:
99
+ rdoc_options: []
100
+
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ segments:
117
+ - 0
118
+ version: "0"
119
+ requirements: []
120
+
121
+ rubyforge_project: mspire
122
+ rubygems_version: 1.3.7
123
+ signing_key:
124
+ specification_version: 3
125
+ summary: An mspire library for calculating error rates in MS/MS identifications (FDRs).
126
+ test_files:
127
+ - spec/ms/error_rate/qvalue_spec.rb
128
+ - spec/ms/error_rate_spec.rb
129
+ - spec/spec_helper.rb