mspire 0.6.21 → 0.6.22
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/ms/error_rate/decoy.rb +40 -0
- data/lib/ms/error_rate/qvalue.rb +81 -0
- data/lib/ms/spectrum.rb +1 -1
- data/mspire.gemspec +6 -2
- data/spec/ms/error_rate/qvalue_spec.rb +35 -0
- data/spec/ms/mzml_spec.rb +2 -4
- metadata +19 -16
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.22
|
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
module ErrorRate
|
4
|
+
module Decoy
|
5
|
+
module_function
|
6
|
+
|
7
|
+
# this is the # true positives (found by estimating the number of false
|
8
|
+
# hits using the # decoy)
|
9
|
+
# pi_not is the ratio of decoy hits to the estimated false hits in the
|
10
|
+
# target set. A data set with a small fraction of true hits will have a
|
11
|
+
# pi_not close to 1. A data set where 40% of the hits are correct
|
12
|
+
# should have a pi_not of 0.6.
|
13
|
+
# For instance, Spivak uses a fixed pi_not of 0.9 in J. Proteome Res.,
|
14
|
+
# 2009, 8 (7), pp 3737–3745
|
15
|
+
def precision(num_target, num_decoy, pi_not=1.0)
|
16
|
+
num_target_f = num_target.to_f
|
17
|
+
num_true_pos = num_target_f - (num_decoy.to_f * pi_not)
|
18
|
+
precision =
|
19
|
+
if num_target_f == 0.0
|
20
|
+
if num_decoy.to_f > 0.0
|
21
|
+
0.0
|
22
|
+
else
|
23
|
+
1.0
|
24
|
+
end
|
25
|
+
else
|
26
|
+
num_true_pos/num_target_f
|
27
|
+
end
|
28
|
+
precision
|
29
|
+
end
|
30
|
+
|
31
|
+
# the false positive predictive rate (sometimes called the false
|
32
|
+
# positive rate). This is 1 - precision
|
33
|
+
def fppr(num_target, num_decoy, pi_not=1.0)
|
34
|
+
1.0 - precision(num_target, num_decoy, pi_not=1.0)
|
35
|
+
end
|
36
|
+
|
37
|
+
extend(self)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ms/error_rate/decoy'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
|
6
|
+
module ErrorRate
|
7
|
+
# For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
|
8
|
+
module Qvalue
|
9
|
+
module_function
|
10
|
+
|
11
|
+
# returns a parallel array to target hits with qvalues
|
12
|
+
# opts = :z_together true/false (default false) group all charges
|
13
|
+
# together.
|
14
|
+
# the sort block should sort from worst to best
|
15
|
+
# by default, sorting is: {|hit| hit.score} if not provided
|
16
|
+
# options also passed through to mixed_target_decoy
|
17
|
+
def target_decoy_qvalues(target_hits, decoy_hits, opts={}, &sorting)
|
18
|
+
sorting ||= :score
|
19
|
+
opts = {:z_together => false}.merge(opts)
|
20
|
+
target_set = Set.new(target_hits)
|
21
|
+
|
22
|
+
# Proc.new doesn't do arity checking
|
23
|
+
hit_with_qvalue_pairs = Proc.new do |hits|
|
24
|
+
sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
|
25
|
+
(target_hits, qvalues) = MS::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
|
26
|
+
target_hits.zip(qvalues)
|
27
|
+
end
|
28
|
+
|
29
|
+
all_together = target_hits + decoy_hits
|
30
|
+
if !opts[:z_together]
|
31
|
+
hit_with_qvalue_pairs.call(all_together)
|
32
|
+
else
|
33
|
+
all_hits = []
|
34
|
+
by_charge = all_together.group_by(&:charge)
|
35
|
+
by_charge.each do |charge,hits|
|
36
|
+
all_hits.push(*(hit_with_qvalue_pairs.call(hits)))
|
37
|
+
end
|
38
|
+
all_hits
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns [target_hits, qvalues] (parallel arrays sorted from best hit to
|
43
|
+
# worst hit). expects an array-like object of hits sorted from best to worst
|
44
|
+
# hit with decoys interspersed and a target_setlike object that responds to
|
45
|
+
# :include? for the hit object assumes the hit is a decoy if not found
|
46
|
+
# in the target set! if monotonic is false, then the guarantee that
|
47
|
+
# qvalues be monotonically increasing is not respected.
|
48
|
+
def mixed_target_decoy(best_to_worst, target_setlike, opts={})
|
49
|
+
opts = {:monotonic => true}.merge(opts)
|
50
|
+
num_target = 0 ; num_decoy = 0
|
51
|
+
monotonic = opts[:monotonic]
|
52
|
+
target_hits = []
|
53
|
+
qvalues = []
|
54
|
+
best_to_worst.each do |hit|
|
55
|
+
if target_setlike.include?(hit)
|
56
|
+
num_target += 1
|
57
|
+
precision = MS::ErrorRate::Decoy.precision(num_target, num_decoy)
|
58
|
+
target_hits << hit
|
59
|
+
qvalues << (1.0 - precision)
|
60
|
+
else
|
61
|
+
num_decoy += 1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
if opts[:monotonic]
|
65
|
+
min_qvalue = qvalues.last
|
66
|
+
qvalues = qvalues.reverse.map do |val| # from worst to best score
|
67
|
+
if min_qvalue < val
|
68
|
+
min_qvalue
|
69
|
+
else
|
70
|
+
min_qvalue = val
|
71
|
+
val
|
72
|
+
end
|
73
|
+
end.reverse
|
74
|
+
end
|
75
|
+
[target_hits, qvalues]
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/ms/spectrum.rb
CHANGED
@@ -45,7 +45,7 @@ module MS
|
|
45
45
|
# number of spectra
|
46
46
|
# :return_data => false returns a parallel array containing
|
47
47
|
# the peaks associated with each returned point
|
48
|
-
# :split => :share | :greedy_y
|
48
|
+
# :split => false | :share | :greedy_y see MS::Peak#split
|
49
49
|
#
|
50
50
|
# The binning algorithm is the fastest possible algorithm that would allow
|
51
51
|
# for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
|
data/mspire.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "mspire"
|
8
|
-
s.version = "0.6.
|
8
|
+
s.version = "0.6.22"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John T. Prince", "Simon Chiang"]
|
12
|
-
s.date = "2012-02-
|
12
|
+
s.date = "2012-02-29"
|
13
13
|
s.description = "mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems"
|
14
14
|
s.email = "jtprince@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -33,6 +33,8 @@ Gem::Specification.new do |s|
|
|
33
33
|
"lib/ms/cv/param.rb",
|
34
34
|
"lib/ms/cv/paramable.rb",
|
35
35
|
"lib/ms/digester.rb",
|
36
|
+
"lib/ms/error_rate/decoy.rb",
|
37
|
+
"lib/ms/error_rate/qvalue.rb",
|
36
38
|
"lib/ms/fasta.rb",
|
37
39
|
"lib/ms/ident.rb",
|
38
40
|
"lib/ms/ident/peptide.rb",
|
@@ -106,12 +108,14 @@ Gem::Specification.new do |s|
|
|
106
108
|
"lib/obo/unit.rb",
|
107
109
|
"lib/openany.rb",
|
108
110
|
"lib/write_file_or_string.rb",
|
111
|
+
"mspire.gemspec",
|
109
112
|
"obo/ims.obo",
|
110
113
|
"obo/ms.obo",
|
111
114
|
"obo/unit.obo",
|
112
115
|
"spec/bin_spec.rb",
|
113
116
|
"spec/ms/cv/param_spec.rb",
|
114
117
|
"spec/ms/digester_spec.rb",
|
118
|
+
"spec/ms/error_rate/qvalue_spec.rb",
|
115
119
|
"spec/ms/fasta_spec.rb",
|
116
120
|
"spec/ms/ident/peptide/db_spec.rb",
|
117
121
|
"spec/ms/ident/pepxml/sample_enzyme_spec.rb",
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/error_rate/qvalue'
|
4
|
+
|
5
|
+
Hit = Struct.new(:score, :charge)
|
6
|
+
HitWeird = Struct.new(:some_obscure_score, :charge)
|
7
|
+
|
8
|
+
describe 'calculating q-values' do
|
9
|
+
|
10
|
+
before do
|
11
|
+
scores = [14,15,13,12,11]
|
12
|
+
qvals_expected = [0.5 ,0.0, 2.0/3.0, 3.0/4, 4.0/5]
|
13
|
+
@target_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(*pair) }
|
14
|
+
@decoy_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(pair.first-0.5, pair.last) }
|
15
|
+
@target_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(*pair) }
|
16
|
+
@decoy_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(pair.first-0.5, pair.last) }
|
17
|
+
@qval_by_hit = {}
|
18
|
+
@target_hits.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
|
19
|
+
@target_hits_weird.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'can calculate qvalues on target/decoy sets (:score is default)' do
|
23
|
+
pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits, @decoy_hits)
|
24
|
+
pairs.each do |hit, qval|
|
25
|
+
@qval_by_hit[hit].should be_within(0.00000001).of(qval)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'can calculate qvalues on target/decoy sets with custom sorting' do
|
30
|
+
pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits_weird, @decoy_hits_weird) {|hit| hit.some_obscure_score }
|
31
|
+
pairs.each do |hit, qval|
|
32
|
+
@qval_by_hit[hit].should be_within(0.00000001).of(qval)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/spec/ms/mzml_spec.rb
CHANGED
@@ -77,10 +77,8 @@ describe MS::Mzml do
|
|
77
77
|
end
|
78
78
|
|
79
79
|
it 'writes MS1 and MS2 spectra' do
|
80
|
-
# profile and ms_level 1
|
81
|
-
|
82
|
-
|
83
|
-
spec1 = MS::Mzml::Spectrum.new('scan=1', params: spec_params) do |spec|
|
80
|
+
# params: profile and ms_level 1
|
81
|
+
spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000128', ['MS:1000511', 1]]) do |spec|
|
84
82
|
spec.data_arrays = [[1,2,3], [4,5,6]]
|
85
83
|
spec.scan_list = MS::Mzml::ScanList.new do |sl|
|
86
84
|
scan = MS::Mzml::Scan.new do |scan|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.22
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-02-
|
13
|
+
date: 2012-02-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
17
|
-
requirement: &
|
17
|
+
requirement: &20175840 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '1.5'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *20175840
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: bsearch
|
28
|
-
requirement: &
|
28
|
+
requirement: &20175280 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.5.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *20175280
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: andand
|
39
|
-
requirement: &
|
39
|
+
requirement: &20174800 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.1
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *20174800
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: obo
|
50
|
-
requirement: &
|
50
|
+
requirement: &20174320 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.1.0
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *20174320
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: rspec
|
61
|
-
requirement: &
|
61
|
+
requirement: &20173800 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,10 +66,10 @@ dependencies:
|
|
66
66
|
version: '2.6'
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *20173800
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: jeweler
|
72
|
-
requirement: &
|
72
|
+
requirement: &20173240 !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
@@ -77,10 +77,10 @@ dependencies:
|
|
77
77
|
version: 1.5.2
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
|
-
version_requirements: *
|
80
|
+
version_requirements: *20173240
|
81
81
|
- !ruby/object:Gem::Dependency
|
82
82
|
name: rcov
|
83
|
-
requirement: &
|
83
|
+
requirement: &20172740 !ruby/object:Gem::Requirement
|
84
84
|
none: false
|
85
85
|
requirements:
|
86
86
|
- - ! '>='
|
@@ -88,7 +88,7 @@ dependencies:
|
|
88
88
|
version: '0'
|
89
89
|
type: :development
|
90
90
|
prerelease: false
|
91
|
-
version_requirements: *
|
91
|
+
version_requirements: *20172740
|
92
92
|
description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire,
|
93
93
|
merging of ms-* gems
|
94
94
|
email: jtprince@gmail.com
|
@@ -114,6 +114,8 @@ files:
|
|
114
114
|
- lib/ms/cv/param.rb
|
115
115
|
- lib/ms/cv/paramable.rb
|
116
116
|
- lib/ms/digester.rb
|
117
|
+
- lib/ms/error_rate/decoy.rb
|
118
|
+
- lib/ms/error_rate/qvalue.rb
|
117
119
|
- lib/ms/fasta.rb
|
118
120
|
- lib/ms/ident.rb
|
119
121
|
- lib/ms/ident/peptide.rb
|
@@ -194,6 +196,7 @@ files:
|
|
194
196
|
- spec/bin_spec.rb
|
195
197
|
- spec/ms/cv/param_spec.rb
|
196
198
|
- spec/ms/digester_spec.rb
|
199
|
+
- spec/ms/error_rate/qvalue_spec.rb
|
197
200
|
- spec/ms/fasta_spec.rb
|
198
201
|
- spec/ms/ident/peptide/db_spec.rb
|
199
202
|
- spec/ms/ident/pepxml/sample_enzyme_spec.rb
|