ms-error_rate 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright shared among contributing institutions:
2
+ Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
+ Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,53 @@
1
+ = {ms-error_rate}[http://mspire.rubyforge.org/projects/ms-error_rate]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library for calculating or dealing
4
+ with error rates. These may be from target-decoy searches, sample bias
5
+ validation, or other sources.
6
+
7
+ == Examples
8
+
9
+ === Target-Decoy with Mascot
10
+
11
+ Generate q-values (right now only with Mascot and MascotPercolator):
12
+
13
+ require 'ms/error_rate/qvalue'
14
+ target_hits = Ms::ErrorRate::Qvalue::Mascot.qvalues(target_files, decoy_files)
15
+ # target_hit is a PeptideHit Struct (:filename, :query_title, :charge, :sequence, :mowse, :qvalue)
16
+
17
+ # or on the commandline:
18
+ % qvalues.rb <target>.dat <decoy>.dat
19
+
20
+ The same output can be produced from Mascot-Percolator output:
21
+
22
+ require 'ms/error_rate/qvalue'
23
+ target_hits = Ms::ErrorRate::Qvalue::Mascot::Percolator.qvalues(datp_files, tab_dot_text_files)
24
+ # or commandline:
25
+ % qvalues.rb <target>.datp <target>.tab.txt
26
+
27
+ === Sample Bias Validation
28
+
29
+ Sample Bias Validation allows error rate determination based on expected biases in sample composition. Here is an example using transmembrane sequence content. We will assume a fasta file called `proteins.fasta`:
30
+
31
+ # create a peptide-centric database
32
+ fasta_to_peptide_centric_db.rb proteins.fasta # defaults 2 missed cleavages, min aaseq 4
33
+ # generates a file: proteins.msd_clvg2.min_aaseq4.yml
34
+
35
+ # create a transmembrane sequence prediction file
36
+ fasta_to_phobius.rb proteins.fasta # => generates proteins.phobius
37
+
38
+ generate_sbv_input_hashes.rb proteins.msd_clvg2.min_aaseq4.yml --tm proteins.phobius,1
39
+ # creates two files:
40
+ # proteins.msd_clvg2.min_aaseq4.tm_min1.by_aaseq.yml
41
+ # proteins.msd_clvg2.min_aaseq4.tm_min1.freq_by_length.yml
42
+
43
+ # cytosolic fraction (transmembrane sequences not expected):
44
+ error_rate qvalues.yml --fp-sbv proteins.msd_clvg2.min_aaseq4.tm_min1.by_aaseq.yml,\
45
+ proteins.msd_clvg2.min_aaseq4.tm_min1.freq_by_length.yml,0.05
46
+
47
+ == Installation
48
+
49
+ gem install ms-error_rate
50
+
51
+ == Copyright
52
+
53
+ See LICENSE
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.7
data/bin/error_rate ADDED
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'support/sort_by_attributes'
4
+ require 'ms/error_rate'
5
+ require 'optparse'
6
+
7
+ @num_tp_validators = 0
8
+ @num_fp_validators = 0
9
+
10
+
11
+ arg_strings_ar = [true,false].map do |boolean|
12
+ ["a false positive indicator", "A = aaseq to indictor (0-1) yml file", "B = aaseq_length to frequency yml file", "C = rate indicators are #{boolean} pos.", "D = name of the validator"]
13
+ end
14
+
15
+ op_sbv_ars = [true,false].zip(arg_strings_ar).map do |boolean, arg_strings|
16
+ letter = boolean ? 't' : 'f'
17
+ ["--#{letter}p-sbv <A,B,C[,D]>,", Array, *arg_strings]
18
+ end
19
+
20
+ raw_validator_args = []
21
+ validator_names = []
22
+
23
+ opt = {
24
+ :order_by => [:qvalue],
25
+ }
26
+
27
+ opts = OptionParser.new do |op|
28
+ op.banner = "usage: #{File.basename(__FILE__)} qvalues.yml ..."
29
+
30
+ op.on("--order-by <Array>", Array, "the keys to order on (default: [qvalue])") {|v| opt[:order_by] = v.map {|v| v.to_sym } }
31
+ op.on("--best-is-low <Array>", Array, "the keys where better score is lower") {|v| opt[:best_is_low] = v.map {|v| v.to_sym } }
32
+
33
+ [true, false].each do |boolean|
34
+ index = boolean ? 0 : 1
35
+ op.on(*op_sbv_ars[index]) do |v|
36
+ (a,b,c,d) = v
37
+ name =
38
+ if d
39
+ d
40
+ else
41
+ if boolean
42
+ @num_tp_validator += 1
43
+ "tp#{@num_tp_validator}"
44
+ else
45
+ @num_fp_validator += 1
46
+ "fp#{@num_fp_validators}"
47
+ end
48
+ end
49
+ validator_names << name
50
+ raw_validator_args << [a,b,c]
51
+ end
52
+ end
53
+ end
54
+
55
+ opts.parse!
56
+
57
+
58
+ if ARGV.size != 1
59
+ puts opts.to_s
60
+ exit
61
+ elsif !opt[:order_by]
62
+ puts "you must specify the order-by array!"
63
+ exit
64
+ end
65
+
66
+ sort_args = opt[:order_by]
67
+ sort_args << {:down => opt[:best_is_low] } # because we will sort normal and reverse the array
68
+
69
+
70
+ # load one validator at a time
71
+
72
+ raw_validator_args.zip(validator_names) do |args, name|
73
+ (a,b,c) = args
74
+ val = Ms::ErrorRate::Sbv.new(YAML.load_file(a), YAML.load_file(b), c)
75
+
76
+ ARGV.each do |file|
77
+ yaml = YAML.load_file(file)
78
+ pepclass = Struct.new(yaml['headers'].map {|v| v.to_sym })
79
+ peps = yaml['data'].each do |ar|
80
+ pepclass.new(*ar)
81
+ end
82
+ sorted_best_to_worst = peps.sort_by_attributes(sort_args)
83
+
84
+ precision_vals = sorted_best_to_worst.map do |pep|
85
+ val.update_precision(pep.aaseq)
86
+ end
87
+
88
+ end
89
+ p precision_vals
90
+ end
91
+
92
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/ruby
2
+
3
+
4
+ if ARGV.size == 0
5
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta"
6
+ puts "output: <file>"
7
+ #puts "WARNING!!: you need to run phobius_to_nontransmembrane.rb before"
8
+ #puts "this to weed out transmembrane proteins!"
9
+ exit
10
+ end
11
+
12
+
13
+
14
+
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'optparse'
5
+ require 'ms/ident/peptidedb'
6
+
7
+ Ms::Ident::Peptidedb.cmdline(ARGV)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'mechanize'
4
+
5
+ page = 'http://phobius.sbc.su.se/'
6
+
7
+ if ARGV.size == 0
8
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta"
9
+ puts "outputs <file>.phobius "
10
+ puts "in short format"
11
+ exit
12
+ end
13
+
14
+
15
+ a = WWW::Mechanize.new { |agent|
16
+ agent.user_agent_alias = 'Mac Safari'
17
+ }
18
+
19
+ ARGV.each do |file|
20
+ outfile = file.chomp(File.extname(file)) + '.phobius'
21
+ a.get(page) do |page|
22
+ form = page.forms.first
23
+ form.radiobuttons.select {|v| v.value == 'short' }.first.click
24
+ fu = form.file_uploads.first
25
+ fu.file_name = File.expand_path(file)
26
+ #fu.file_data = IO.read(file)
27
+ reply = form.submit
28
+ html = reply.body
29
+ start = html.index("<pre>") + 5
30
+ stop = html.rindex("</pre>")
31
+ File.open(outfile, 'w') {|out| out.print html[start...stop] }
32
+ end
33
+ end
34
+
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+
5
+ require 'ms/error_rate/sbv/peptide_based'
6
+ require 'ms/error_rate/sbv/protein_based'
7
+
8
+ opt = {}
9
+ opt[:protein_bias] = []
10
+
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} peptide_centric_db [OPTION]"
13
+ op.on("--tm <phobius,min>", Array, "transmembrane, <phobius> is path to phobius ", "output file (see fasta_to_phobius.rb)", "<min> is the min number of tm sequences required") {|v| opt[:tm] = [v.first, v.last.to_i]}
14
+ op.on("--aa <aa,min>", Array, "amino acid, <aa> is a string found in the peptides", "<min> is the min number of required for counting") {|v| opt[:aa] = [v.first, v.last.to_i]}
15
+ op.on("--protein-bias <name,file>", Array, "<name> bias, <file> is path to a yaml hash", " keyed prot -> <0-1>") {|v| opt[:protein_bias] << [v.first.to_sym, v.last]}
16
+ op.separator "outputs for each bias type:"
17
+ op.separator " <peptide_centric_db>.<info>.#{Ms::ErrorRate::Sbv::LENGTH_EXT}"
18
+ op.separator " <peptide_centric_db>.<info>.#{Ms::ErrorRate::Sbv::AASEQ_EXT}"
19
+ end
20
+
21
+ opts.parse!
22
+
23
+ if ARGV.size == 0
24
+ puts opts.to_s
25
+ exit
26
+ end
27
+
28
+ peptide_centric_db = ARGV.first
29
+
30
+ def note_files(files)
31
+ files.each do |file| puts "WROTE: #{file}" end
32
+ end
33
+
34
+ klass = Ms::ErrorRate::Sbv
35
+ prot_klass = Ms::ErrorRate::Sbv::ProteinBased
36
+ pep_klass = Ms::ErrorRate::Sbv::PeptideBased
37
+
38
+ if opt[:tm]
39
+ index = TransmembraneIndex.new(opt[:tm].first)
40
+
41
+ protid_to_transmembrane = {}
42
+ regexp = nil
43
+ index.each do |k,v|
44
+ regexp ||= Ms::Fasta.id_regexp(k)
45
+ new_key = regexp.match(k)[1]
46
+ protid_to_transmembrane[new_key] = ((v[:num_certain_transmembrane_segments] >= opt[:tm].last) ? 1 : 0)
47
+ end
48
+
49
+ fnames = prot_klass.generate_hashes( peptide_centric_db, protid_to_transmembrane, {:type_code => "tm_min#{opt[:tm].last}"})
50
+ note_files fnames
51
+ end
52
+
53
+ if opt[:aa]
54
+ fnames = pep_klass.generate_hashes( peptide_centric_db, *opt[:aa] )
55
+ note_files fnames
56
+ end
57
+
58
+ if opt[:protein_bias].size > 0
59
+ opt[:protein_bias].each do |name, hash_file|
60
+ prot_klass.generate_hashes( peptide_centric_db, hash_file)
61
+ end
62
+ end
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'trollop'
4
+ require 'nokogiri'
5
+ require 'set'
6
+
7
+ require 'ms/error_rate/qvalue'
8
+
9
+ opts = Trollop::Parser.new do
10
+ banner %Q{usage: #{File.basename(__FILE__)} <fwd>.xml <decoy>.xml ...
11
+ outputs: <fwd>.phq.csv
12
+ phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
13
+ }
14
+ opt :z_together, "do not group by charge state", :default => false
15
+ end
16
+
17
+ DELIMITER = "\t"
18
+
19
+ opt = opts.parse(ARGV)
20
+ if ARGV.size == 0 || (ARGV.size%2 != 0)
21
+ puts "\n\n!! only even numbers of files accepted (target decoy target decoy ...) !!\n\n" if (ARGV.size%2 != 0)
22
+ opts.educate
23
+ exit
24
+ end
25
+
26
+ files = ARGV.to_a
27
+
28
+ PeptideHit = Struct.new(:aaseq, :charge, :ionscore, :qvalue)
29
+
30
+ # this is a list of high quality peptide hits associated with each group
31
+ peptide_hits_per_file = files.map do |file|
32
+ File.open(file) do |io|
33
+ doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
34
+ # we can work with namespaces, or just remove them ...
35
+ doc.remove_namespaces!
36
+ root = doc.root
37
+ search_hits = root.xpath('//search_hit')
38
+ search_hits.map do |search_hit|
39
+ aaseq = search_hit['peptide']
40
+ ionscore = search_hit.children.find {|node| node.name == 'search_score' && node['name'] == 'ionscore' }['value'].to_f
41
+ charge = search_hit.parent.parent['assumed_charge'].to_i
42
+ PeptideHit.new(aaseq, charge, ionscore)
43
+ end
44
+ end
45
+ end
46
+
47
+ hits_per_target = peptide_hits_per_file.each_slice(2).map do |target_hits, decoy_hits|
48
+ pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_hits, decoy_hits, :z_together => opt[:z_together], &:ionscore)
49
+ target_peptide_hits = pairs.map {|peptide_hit, qvalue| peptide_hit.qvalue = qvalue ; peptide_hit }
50
+ end
51
+
52
+ files.each_slice(2).map(&:first).zip(hits_per_target) do |file, hits|
53
+ newfile = file.chomp(File.extname(file)) + ".phq.tsv"
54
+ File.open(newfile,'w') do |out|
55
+ out.puts %w(aaseq charge qvalue).join(DELIMITER)
56
+ hits.each do |hit|
57
+ out.puts hit.values_at(0,1,3).join(DELIMITER)
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'ms/fasta'
4
+ require 'transmembrane/phobius.rb'
5
+
6
+ if ARGV.size != 3
7
+ puts "usage: #{File.basename(__FILE__)} <max#tm> phobius_file_short <file>.fasta"
8
+ puts "max#tm = max # of transmembrane sequences allowed to be a non-transmembrane."
9
+ puts ""
10
+ puts "outputs: <file>_NONTM.fasta"
11
+ exit
12
+ end
13
+
14
+ (max_num_tm, phobius_short_file, fasta_db_file) = ARGV
15
+ max_num_tm = max_num_tm.to_i
16
+
17
+ base = fasta_db_file.chomp(File.extname(fasta_db_file))
18
+ outfile = base + "_NONTM.fasta"
19
+
20
+ index = Phobius::Index.new(phobius_short_file)
21
+
22
+ File.open(outfile, 'w') do |out|
23
+ Ms::Fasta.open(fasta_db_file) do |fasta|
24
+ fasta.each do |entry|
25
+ key = index.reference_to_key(entry.header)
26
+ abort "can't find key: #{key} for #{entry.header}" unless index.key?(key)
27
+ num_tms = index[key][:num_certain_transmembrane_segments]
28
+ if num_tms <= max_num_tm
29
+ out.print entry.to_s
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+
data/bin/qvalues.rb ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'ms/error_rate/qvalue'
5
+
6
+ DEF_EXT = "_flip"
7
+ NORMAL_EXT = 'qval.yml'
8
+
9
+ def print_out(outfile, filenames, headers, target_hits)
10
+ File.open(outfile, 'w') do |out|
11
+ out.print( {'headers' => headers, 'filenames' => filenames, 'data' => target_hits }.to_yaml )
12
+ end
13
+ end
14
+
15
+ opt = {
16
+ :outfile => NORMAL_EXT,
17
+ :min_peptide_length => 9,
18
+ }
19
+
20
+ opts = OptionParser.new do |op|
21
+ op.banner = "usage: #{File.basename(__FILE__)} <target> <decoy> [... (as pairs)]"
22
+ op.separator "or: #{File.basename(__FILE__)} <target>.datp <target>.tab.txt [... (as pairs)]"
23
+ op.separator "for each pair of files"
24
+ op.separator "sorts the peptide hits by score and determines the precision at each hit"
25
+ op.separator ""
26
+ op.separator "writes a yaml file <target>.'#{NORMAL_EXT}' which"
27
+ op.separator "has three keys: 'headers', 'filenames', and 'data'"
28
+ op.separator " headers contains an array showing what is in the data"
29
+ op.separator " filenames: (a hash with two keys holding an array of full path names)"
30
+ op.separator " target:"
31
+ op.separator " decoy:"
32
+ op.separator " data: (an array with the data values)"
33
+ op.separator "headers: <the headers of the hits>"
34
+ op.separator ""
35
+ op.separator "headers guaranteed to have at least: filename, query_title, charge, sequence, qvalue"
36
+ op.separator ""
37
+ op.on("-l", "--min-peptide-length <Int>", Integer, "min num aa's to accept (default: #{opt[:min_peptide_length]})") {|v| opt[:min_peptide_length] = v }
38
+ op.on("--z-together", "combines all charge states for precision calc") {|v| opt[:z_together] = v }
39
+ op.on("-o", "--outfile <name>", "write to specified file") {|v| opt[:outfile] = v }
40
+ op.on("-g", "--group-together", "process all forwards together and all decoys together", "will output to opt[:outfile] unless -o given") {|v| opt[:group_together] = v }
41
+ op.on("-f", "--find-decoy [ext]", "finds the decoy file, default <file>#{DEF_EXT}.<ext>", "obviating the need to specify it on the commandline") do |v|
42
+ if v.is_a? String
43
+ opt[:find_decoy] = v
44
+ else
45
+ opt[:find_decoy] = DEF_EXT
46
+ end
47
+ end
48
+ end
49
+
50
+ opts.parse!
51
+
52
+ if ARGV.size == 0
53
+ puts opts.to_s
54
+ exit
55
+ end
56
+
57
+ target_files = []
58
+ decoy_files = []
59
+ if opt[:find_decoy]
60
+ target_files = ARGV.to_a.dup
61
+ decoy_files = target_files.map do |tf|
62
+ ext = File.extname(tf)
63
+ basename = tf.chomp(ext)
64
+ decoy_file = basename + opt[:find_decoy] + ext
65
+ raise ArgumentError, "cannot find #{decoy_file}" unless File.exist?(decoy_file)
66
+ decoy_file
67
+ end
68
+ else
69
+ ARGV.each_slice(2) do |target, decoy|
70
+ target_files << target
71
+ decoy_files << decoy
72
+ end
73
+ end
74
+
75
+ require 'ms/error_rate/qvalue/mascot'
76
+ require 'ms/error_rate/qvalue/mascot/percolator'
77
+
78
+ mascot_percolator = (File.extname(target_files.first) == '.datp')
79
+ headers = Ms::ErrorRate::Qvalue::Mascot::MEMBERS.map(&:to_s)
80
+ if opt[:group_together]
81
+ outfile = opt[:outfile]
82
+ if mascot_percolator
83
+ filenames = { 'target' => target_files, 'decoy' => decoy_files }
84
+ # in the case of mascot_percolator, the "target" files are .datp files and
85
+ # "decoy" files the .tab.txt files
86
+ target_hits = Ms::ErrorRate::Qvalue::Mascot::Percolator.qvalues( target_files, decoy_files, opt).sort_by(&:qvalue)
87
+ else
88
+ filenames = { 'target' => target_files, 'decoy' => decoy_files }
89
+ target_hits = Ms::ErrorRate::Qvalue::Mascot.qvalues(target_files, decoy_files, opt).sort_by(&:qvalue)
90
+ end
91
+ print_out(outfile, filenames, headers, target_hits)
92
+ else
93
+ target_files.zip(decoy_files) do |target_file, decoy_file|
94
+ if mascot_percolator
95
+ filenames = { 'datp' => [target_file], 'tab_txt' => [decoy_file] }
96
+ target_hits = Ms::ErrorRate::Qvalue::Mascot::Percolator.qvalues([target_file], [decoy_file], opt).sort_by(&:qvalue)
97
+ else
98
+ filenames = { 'target' => [target_file], 'decoy' => [decoy_file] }
99
+ target_hits = Ms::ErrorRate::Qvalue::Mascot.qvalues([target_file], [decoy_file], opt).sort_by(&:qvalue)
100
+ end
101
+ base = target_file.chomp(File.extname(target_file))
102
+ outfile = base + '.' + NORMAL_EXT
103
+ print_out(outfile, filenames, headers, target_hits)
104
+ end
105
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/error_rate/qvalue'
4
+
5
+ Hit = Struct.new(:score, :charge)
6
+
7
+ describe 'calculating q-values' do
8
+
9
+ before do
10
+ scores = [14,15,13,12,11]
11
+ qvals_expected = [0.5 ,0.0, 2.0/3.0, 3.0/4, 4.0/5]
12
+ @target_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(*pair) }
13
+ @decoy_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(pair.first-0.5, pair.last) }
14
+ @qval_by_hit = {}
15
+ @target_hits.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
16
+ end
17
+
18
+ it 'can calculate qvalues on target deccoy sets' do
19
+ pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits, @decoy_hits)
20
+ pairs.each do |hit, qval|
21
+ @qval_by_hit[hit].should.be.close(qval, 0.00000001)
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/error_rate'
4
+ require 'ostruct'
5
+
6
+ xdescribe 'not quite sure what this is' do
7
+
8
+ it 'calculates bayesian probabilities' do
9
+ # C = is a correct ID
10
+ # T = transmembrane content
11
+ # Y = cysteine content
12
+ # A = abundance
13
+ # p(C|T,Y,A) = p(T|C)p(Y|C)p(A|C)p(C) / p(T)p(Y)p(A)
14
+ peps.map do |pep|
15
+ # what is the probability of that un-transmembraneyness being correct?
16
+ # what is the probability of that un-cysteineness being correct?
17
+ # what is the probability of that high abundanceness being correct?
18
+ pep.bayes_probs.reduce(prob_being_correct) do |prob|
19
+ end
20
+ p_correct = pep.prior_prob_correct
21
+ pep.not_transmembrane? * pep.not_cysteine? * pep.not_low_abundance?
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'spec/more'
3
+
4
+ Bacon.summary_on_exit
5
+
6
+ TESTFILES = File.dirname(__FILE__) + "/testfiles"
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-error_rate
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 7
9
+ version: 0.0.7
10
+ platform: ruby
11
+ authors:
12
+ - John T. Prince
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-03-28 00:00:00 -06:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: ms-core
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 0
31
+ - 2
32
+ version: 0.0.2
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: ms-fasta
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 0
45
+ - 2
46
+ - 3
47
+ version: 0.2.3
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: spec-more
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :development
62
+ version_requirements: *id003
63
+ description: aids for creating and calculating error rates using target-decoy searches and sample validation.
64
+ email: jtprince@gmail.com
65
+ executables:
66
+ - error_rate
67
+ - fasta_to_nuclear.rb
68
+ - fasta_to_peptide_centric_db.rb
69
+ - fasta_to_phobius.rb
70
+ - generate_sbv_input_hashes.rb
71
+ - mascot_pepxml_to_peptide_hit_qvalues.rb
72
+ - phobius_to_nontransmembrane.rb
73
+ - qvalues.rb
74
+ extensions: []
75
+
76
+ extra_rdoc_files:
77
+ - LICENSE
78
+ - README.rdoc
79
+ files:
80
+ - VERSION
81
+ - LICENSE
82
+ - README.rdoc
83
+ - spec/ms/error_rate/qvalue_spec.rb
84
+ - spec/ms/error_rate_spec.rb
85
+ - spec/spec_helper.rb
86
+ - bin/error_rate
87
+ - bin/fasta_to_nuclear.rb
88
+ - bin/fasta_to_peptide_centric_db.rb
89
+ - bin/fasta_to_phobius.rb
90
+ - bin/generate_sbv_input_hashes.rb
91
+ - bin/mascot_pepxml_to_peptide_hit_qvalues.rb
92
+ - bin/phobius_to_nontransmembrane.rb
93
+ - bin/qvalues.rb
94
+ has_rdoc: true
95
+ homepage: http://jtprince.github.com/ms-error_rate
96
+ licenses: []
97
+
98
+ post_install_message:
99
+ rdoc_options: []
100
+
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ segments:
117
+ - 0
118
+ version: "0"
119
+ requirements: []
120
+
121
+ rubyforge_project: mspire
122
+ rubygems_version: 1.3.7
123
+ signing_key:
124
+ specification_version: 3
125
+ summary: An mspire library for calculating error rates in MS/MS identifications (FDRs).
126
+ test_files:
127
+ - spec/ms/error_rate/qvalue_spec.rb
128
+ - spec/ms/error_rate_spec.rb
129
+ - spec/spec_helper.rb