mspire 0.6.21 → 0.6.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/ms/error_rate/decoy.rb +40 -0
- data/lib/ms/error_rate/qvalue.rb +81 -0
- data/lib/ms/spectrum.rb +1 -1
- data/mspire.gemspec +6 -2
- data/spec/ms/error_rate/qvalue_spec.rb +35 -0
- data/spec/ms/mzml_spec.rb +2 -4
- metadata +19 -16
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.22
|
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
module ErrorRate
|
4
|
+
module Decoy
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# this is the # true positives (found by estimating the number of false
|
8
|
+
# hits using the # decoy)
|
9
|
+
# pi_not is the ratio of decoy hits to the estimated false hits in the
|
10
|
+
# target set. A data set with a small fraction of true hits will have a
|
11
|
+
# pi_not close to 1. A data set where 40% of the hits are correct
|
12
|
+
# should have a pi_not of 0.6.
|
13
|
+
# For instance, Spivak uses a fixed pi_not of 0.9 in J. Proteome Res.,
|
14
|
+
# 2009, 8 (7), pp 3737–3745
|
15
|
+
def precision(num_target, num_decoy, pi_not=1.0)
|
16
|
+
num_target_f = num_target.to_f
|
17
|
+
num_true_pos = num_target_f - (num_decoy.to_f * pi_not)
|
18
|
+
precision =
|
19
|
+
if num_target_f == 0.0
|
20
|
+
if num_decoy.to_f > 0.0
|
21
|
+
0.0
|
22
|
+
else
|
23
|
+
1.0
|
24
|
+
end
|
25
|
+
else
|
26
|
+
num_true_pos/num_target_f
|
27
|
+
end
|
28
|
+
precision
|
29
|
+
end
|
30
|
+
|
31
|
+
# the false positive predictive rate (sometimes called the false
|
32
|
+
# positive rate). This is 1 - precision
|
33
|
+
def fppr(num_target, num_decoy, pi_not=1.0)
|
34
|
+
1.0 - precision(num_target, num_decoy, pi_not=1.0)
|
35
|
+
end
|
36
|
+
|
37
|
+
extend(self)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ms/error_rate/decoy'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
|
6
|
+
module ErrorRate
|
7
|
+
# For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
|
8
|
+
module Qvalue
|
9
|
+
module_function
|
10
|
+
|
11
|
+
# returns a parallel array to target hits with qvalues
|
12
|
+
# opts = :z_together true/false (default false) group all charges
|
13
|
+
# together.
|
14
|
+
# the sort block should sort from worst to best
|
15
|
+
# by default, sorting is: {|hit| hit.score} if not provided
|
16
|
+
# options also passed through to mixed_target_decoy
|
17
|
+
def target_decoy_qvalues(target_hits, decoy_hits, opts={}, &sorting)
|
18
|
+
sorting ||= :score
|
19
|
+
opts = {:z_together => false}.merge(opts)
|
20
|
+
target_set = Set.new(target_hits)
|
21
|
+
|
22
|
+
# Proc.new doesn't do arity checking
|
23
|
+
hit_with_qvalue_pairs = Proc.new do |hits|
|
24
|
+
sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
|
25
|
+
(target_hits, qvalues) = MS::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
|
26
|
+
target_hits.zip(qvalues)
|
27
|
+
end
|
28
|
+
|
29
|
+
all_together = target_hits + decoy_hits
|
30
|
+
if !opts[:z_together]
|
31
|
+
hit_with_qvalue_pairs.call(all_together)
|
32
|
+
else
|
33
|
+
all_hits = []
|
34
|
+
by_charge = all_together.group_by(&:charge)
|
35
|
+
by_charge.each do |charge,hits|
|
36
|
+
all_hits.push(*(hit_with_qvalue_pairs.call(hits)))
|
37
|
+
end
|
38
|
+
all_hits
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns [target_hits, qvalues] (parallel arrays sorted from best hit to
|
43
|
+
# worst hit). expects an array-like object of hits sorted from best to worst
|
44
|
+
# hit with decoys interspersed and a target_setlike object that responds to
|
45
|
+
# :include? for the hit object assumes the hit is a decoy if not found
|
46
|
+
# in the target set! if monotonic is false, then the guarantee that
|
47
|
+
# qvalues be monotonically increasing is not respected.
|
48
|
+
def mixed_target_decoy(best_to_worst, target_setlike, opts={})
|
49
|
+
opts = {:monotonic => true}.merge(opts)
|
50
|
+
num_target = 0 ; num_decoy = 0
|
51
|
+
monotonic = opts[:monotonic]
|
52
|
+
target_hits = []
|
53
|
+
qvalues = []
|
54
|
+
best_to_worst.each do |hit|
|
55
|
+
if target_setlike.include?(hit)
|
56
|
+
num_target += 1
|
57
|
+
precision = MS::ErrorRate::Decoy.precision(num_target, num_decoy)
|
58
|
+
target_hits << hit
|
59
|
+
qvalues << (1.0 - precision)
|
60
|
+
else
|
61
|
+
num_decoy += 1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
if opts[:monotonic]
|
65
|
+
min_qvalue = qvalues.last
|
66
|
+
qvalues = qvalues.reverse.map do |val| # from worst to best score
|
67
|
+
if min_qvalue < val
|
68
|
+
min_qvalue
|
69
|
+
else
|
70
|
+
min_qvalue = val
|
71
|
+
val
|
72
|
+
end
|
73
|
+
end.reverse
|
74
|
+
end
|
75
|
+
[target_hits, qvalues]
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/ms/spectrum.rb
CHANGED
@@ -45,7 +45,7 @@ module MS
|
|
45
45
|
# number of spectra
|
46
46
|
# :return_data => false returns a parallel array containing
|
47
47
|
# the peaks associated with each returned point
|
48
|
-
# :split => :share | :greedy_y
|
48
|
+
# :split => false | :share | :greedy_y see MS::Peak#split
|
49
49
|
#
|
50
50
|
# The binning algorithm is the fastest possible algorithm that would allow
|
51
51
|
# for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
|
data/mspire.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "mspire"
|
8
|
-
s.version = "0.6.
|
8
|
+
s.version = "0.6.22"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John T. Prince", "Simon Chiang"]
|
12
|
-
s.date = "2012-02-
|
12
|
+
s.date = "2012-02-29"
|
13
13
|
s.description = "mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems"
|
14
14
|
s.email = "jtprince@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -33,6 +33,8 @@ Gem::Specification.new do |s|
|
|
33
33
|
"lib/ms/cv/param.rb",
|
34
34
|
"lib/ms/cv/paramable.rb",
|
35
35
|
"lib/ms/digester.rb",
|
36
|
+
"lib/ms/error_rate/decoy.rb",
|
37
|
+
"lib/ms/error_rate/qvalue.rb",
|
36
38
|
"lib/ms/fasta.rb",
|
37
39
|
"lib/ms/ident.rb",
|
38
40
|
"lib/ms/ident/peptide.rb",
|
@@ -106,12 +108,14 @@ Gem::Specification.new do |s|
|
|
106
108
|
"lib/obo/unit.rb",
|
107
109
|
"lib/openany.rb",
|
108
110
|
"lib/write_file_or_string.rb",
|
111
|
+
"mspire.gemspec",
|
109
112
|
"obo/ims.obo",
|
110
113
|
"obo/ms.obo",
|
111
114
|
"obo/unit.obo",
|
112
115
|
"spec/bin_spec.rb",
|
113
116
|
"spec/ms/cv/param_spec.rb",
|
114
117
|
"spec/ms/digester_spec.rb",
|
118
|
+
"spec/ms/error_rate/qvalue_spec.rb",
|
115
119
|
"spec/ms/fasta_spec.rb",
|
116
120
|
"spec/ms/ident/peptide/db_spec.rb",
|
117
121
|
"spec/ms/ident/pepxml/sample_enzyme_spec.rb",
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/error_rate/qvalue'
|
4
|
+
|
5
|
+
Hit = Struct.new(:score, :charge)
|
6
|
+
HitWeird = Struct.new(:some_obscure_score, :charge)
|
7
|
+
|
8
|
+
describe 'calculating q-values' do
|
9
|
+
|
10
|
+
before do
|
11
|
+
scores = [14,15,13,12,11]
|
12
|
+
qvals_expected = [0.5 ,0.0, 2.0/3.0, 3.0/4, 4.0/5]
|
13
|
+
@target_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(*pair) }
|
14
|
+
@decoy_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(pair.first-0.5, pair.last) }
|
15
|
+
@target_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(*pair) }
|
16
|
+
@decoy_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(pair.first-0.5, pair.last) }
|
17
|
+
@qval_by_hit = {}
|
18
|
+
@target_hits.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
|
19
|
+
@target_hits_weird.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'can calculate qvalues on target/decoy sets (:score is default)' do
|
23
|
+
pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits, @decoy_hits)
|
24
|
+
pairs.each do |hit, qval|
|
25
|
+
@qval_by_hit[hit].should be_within(0.00000001).of(qval)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'can calculate qvalues on target/decoy sets with custom sorting' do
|
30
|
+
pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits_weird, @decoy_hits_weird) {|hit| hit.some_obscure_score }
|
31
|
+
pairs.each do |hit, qval|
|
32
|
+
@qval_by_hit[hit].should be_within(0.00000001).of(qval)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/spec/ms/mzml_spec.rb
CHANGED
@@ -77,10 +77,8 @@ describe MS::Mzml do
|
|
77
77
|
end
|
78
78
|
|
79
79
|
it 'writes MS1 and MS2 spectra' do
|
80
|
-
# profile and ms_level 1
|
81
|
-
|
82
|
-
|
83
|
-
spec1 = MS::Mzml::Spectrum.new('scan=1', params: spec_params) do |spec|
|
80
|
+
# params: profile and ms_level 1
|
81
|
+
spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000128', ['MS:1000511', 1]]) do |spec|
|
84
82
|
spec.data_arrays = [[1,2,3], [4,5,6]]
|
85
83
|
spec.scan_list = MS::Mzml::ScanList.new do |sl|
|
86
84
|
scan = MS::Mzml::Scan.new do |scan|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.22
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-02-
|
13
|
+
date: 2012-02-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
17
|
-
requirement: &
|
17
|
+
requirement: &20175840 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '1.5'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *20175840
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: bsearch
|
28
|
-
requirement: &
|
28
|
+
requirement: &20175280 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.5.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *20175280
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: andand
|
39
|
-
requirement: &
|
39
|
+
requirement: &20174800 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.1
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *20174800
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: obo
|
50
|
-
requirement: &
|
50
|
+
requirement: &20174320 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.1.0
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *20174320
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: rspec
|
61
|
-
requirement: &
|
61
|
+
requirement: &20173800 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,10 +66,10 @@ dependencies:
|
|
66
66
|
version: '2.6'
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *20173800
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: jeweler
|
72
|
-
requirement: &
|
72
|
+
requirement: &20173240 !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
@@ -77,10 +77,10 @@ dependencies:
|
|
77
77
|
version: 1.5.2
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
|
-
version_requirements: *
|
80
|
+
version_requirements: *20173240
|
81
81
|
- !ruby/object:Gem::Dependency
|
82
82
|
name: rcov
|
83
|
-
requirement: &
|
83
|
+
requirement: &20172740 !ruby/object:Gem::Requirement
|
84
84
|
none: false
|
85
85
|
requirements:
|
86
86
|
- - ! '>='
|
@@ -88,7 +88,7 @@ dependencies:
|
|
88
88
|
version: '0'
|
89
89
|
type: :development
|
90
90
|
prerelease: false
|
91
|
-
version_requirements: *
|
91
|
+
version_requirements: *20172740
|
92
92
|
description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire,
|
93
93
|
merging of ms-* gems
|
94
94
|
email: jtprince@gmail.com
|
@@ -114,6 +114,8 @@ files:
|
|
114
114
|
- lib/ms/cv/param.rb
|
115
115
|
- lib/ms/cv/paramable.rb
|
116
116
|
- lib/ms/digester.rb
|
117
|
+
- lib/ms/error_rate/decoy.rb
|
118
|
+
- lib/ms/error_rate/qvalue.rb
|
117
119
|
- lib/ms/fasta.rb
|
118
120
|
- lib/ms/ident.rb
|
119
121
|
- lib/ms/ident/peptide.rb
|
@@ -194,6 +196,7 @@ files:
|
|
194
196
|
- spec/bin_spec.rb
|
195
197
|
- spec/ms/cv/param_spec.rb
|
196
198
|
- spec/ms/digester_spec.rb
|
199
|
+
- spec/ms/error_rate/qvalue_spec.rb
|
197
200
|
- spec/ms/fasta_spec.rb
|
198
201
|
- spec/ms/ident/peptide/db_spec.rb
|
199
202
|
- spec/ms/ident/pepxml/sample_enzyme_spec.rb
|